diff --git a/.asf.yaml b/.asf.yaml index d122da1c85c5e..5954246e6890a 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -66,6 +66,9 @@ github: v2-5-stable: required_pull_request_reviews: required_approving_review_count: 1 + v2-6-stable: + required_pull_request_reviews: + required_approving_review_count: 1 collaborators: - auvipy @@ -74,9 +77,7 @@ github: - gmcdonald - mhenc - ferruzzi - - norm - mobuchowski - - hussein-awala notifications: jobs: jobs@airflow.apache.org diff --git a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_bug_report.yml index d05fcdf12d9e4..34a5aeecbcf4e 100644 --- a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_bug_report.yml @@ -26,6 +26,7 @@ body: multiple: false options: - "2.5.3" + - "2.6.0b1" - "main (development)" - "Other Airflow 2 version (please specify below)" validations: diff --git a/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml index dbbd5620430b9..34c36d45686d1 100644 --- a/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml @@ -28,7 +28,8 @@ body: What Apache Airflow Helm Chart version are you using? multiple: false options: - - "1.8.0 (latest released)" + - "1.9.0 (latest released)" + - "1.8.0" - "1.7.0" - "1.6.0" - "1.5.0" diff --git a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml index 2507feafcb677..77514e2e4812e 100644 --- a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml @@ -75,6 +75,7 @@ body: - neo4j - odbc - openfaas + - openlineage - opsgenie - oracle - pagerduty diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce450ac234384..173c4121cdbce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,6 +167,7 @@ jobs: default-constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} docs-filter: ${{ steps.selective-checks.outputs.docs-filter }} skip-pre-commits: ${{ steps.selective-checks.outputs.skip-pre-commits }} + helm-test-packages: ${{ steps.selective-checks.outputs.helm-test-packages }} debug-resources: ${{ steps.selective-checks.outputs.debug-resources }} suspended-providers-folders: ${{ steps.selective-checks.outputs.suspended-providers-folders }} source-head-repo: ${{ steps.source-run-info.outputs.source-head-repo }} @@ -735,10 +736,10 @@ jobs: - name: "Prepare airflow package: ${{matrix.package-format}}" run: breeze release-management prepare-airflow-package --version-suffix-for-pypi dev0 - name: "Verify wheel packages with twine" - run: pipx install twine && twine check dist/*.whl + run: pipx install twine --force && twine check dist/*.whl if: matrix.package-format == 'wheel' - name: "Verify sdist packages with twine" - run: pipx install twine && twine check dist/*.tar.gz + run: pipx install twine --force && twine check dist/*.tar.gz if: matrix.package-format == 'sdist' - name: "Test providers issue generation automatically" run: > @@ -753,6 +754,7 @@ jobs: run: | rm -vf dist/apache_airflow-*.whl # remove the provider packages that are not compatible with 2.3 + rm -vf dist/apache_airflow_providers_openlineage*.whl # rm -vf dist/apache_airflow_providers_docker*.whl # pip download --no-deps --dest dist apache-airflow-providers-docker==3.1.0 if: matrix.package-format == 'wheel' @@ -809,9 +811,13 @@ jobs: tests-helm: timeout-minutes: 80 - name: "Python unit tests for Helm chart" + name: "Unit tests Helm: ${{matrix.helm-test-package}}" runs-on: "${{needs.build-info.outputs.runs-on}}" needs: [build-info, wait-for-ci-images] + strategy: + fail-fast: false + matrix: + helm-test-package: ${{fromJson(needs.build-info.outputs.helm-test-packages)}} env: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" PARALLEL_TEST_TYPES: "Helm" @@ -834,8 +840,8 @@ jobs: - name: > Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}} uses: ./.github/actions/prepare_breeze_and_image - - name: "Helm Unit Tests" - run: breeze testing helm-tests + - name: "Helm Unit Tests: ${{ matrix.helm-test-package }}" + run: breeze testing helm-tests --helm-test-package "${{ matrix.helm-test-package }}" - name: "Post Helm Tests" uses: ./.github/actions/post_tests @@ -848,6 +854,7 @@ jobs: env: RUNS_ON: "${{needs.build-info.outputs.runs-on}}" BACKEND: sqlite + SUSPENDED_PROVIDERS_FOLDERS: "${{ needs.build-info.outputs.suspended-providers-folders }}" PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" TEST_TYPES: "${{needs.build-info.outputs.parallel-test-types}}" FULL_TESTS_NEEDED: "${{needs.build-info.outputs.full-tests-needed}}" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc33daf95805b..4a152a6c1dd7a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -160,7 +160,7 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py language: python files: ^scripts/ci/pre_commit/pre_commit_update_common_sql_api\.py|^airflow/providers/common/sql/.*\.pyi?$ - additional_dependencies: ['rich>=12.4.4', 'mypy==1.0.0', 'black==22.12.0', 'jinja2'] + additional_dependencies: ['rich>=12.4.4', 'mypy==1.2.0', 'black==22.12.0', 'jinja2'] pass_filenames: false require_serial: true - id: update-black-version @@ -181,7 +181,7 @@ repos: entry: ruff --fix --no-update-check --force-exclude additional_dependencies: ['ruff==0.0.226'] files: \.pyi?$ - exclude: ^.*/.*_vendor/ + exclude: ^.*/.*_vendor/|^tests/dags/test_imports.py - repo: https://github.com/asottile/blacken-docs rev: 1.13.0 hooks: @@ -852,7 +852,7 @@ repos: ^generated/provider_dependencies.json$ require_serial: true pass_filenames: false - additional_dependencies: ['rich>=12.4.4', 'rich-click>=1.5', 'inputimeout', 'pyyaml'] + additional_dependencies: ['rich>=12.4.4', 'rich-click>=1.5', 'inputimeout', 'pyyaml', 'packaging'] - id: check-example-dags-urls name: Check that example dags url include provider versions entry: ./scripts/ci/pre_commit/pre_commit_update_example_dags_paths.py @@ -905,7 +905,7 @@ repos: language: python entry: ./scripts/ci/pre_commit/pre_commit_mypy.py --namespace-packages files: \.py$ - exclude: ^.*/.*_vendor/|^airflow/migrations|^airflow/providers|^dev|^docs|^provider_packages|^tests/providers|^tests/system/providers + exclude: ^.*/.*_vendor/|^airflow/migrations|^airflow/providers|^dev|^docs|^provider_packages|^tests/providers|^tests/system/providers|^tests/dags/test_imports.py require_serial: true additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml'] - id: mypy-providers diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7039563af2d64..d8aa4e54d7757 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -618,10 +618,10 @@ devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elastics facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, -opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, postgres, presto, -qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, -slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, -virtualenv, webhdfs, winrm, yandex, zendesk +openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, +postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, +sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, +trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk .. END EXTRAS HERE Provider packages diff --git a/Dockerfile b/Dockerfile index afc8dd87cc0ff..025c4e13dc533 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,7 @@ ARG AIRFLOW_VERSION="2.5.3" ARG PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" -ARG AIRFLOW_PIP_VERSION=23.0.1 +ARG AIRFLOW_PIP_VERSION=23.1 ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" ARG AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" @@ -433,7 +433,7 @@ function common::get_airflow_version_specification() { function common::override_pip_version_if_needed() { if [[ -n ${AIRFLOW_VERSION} ]]; then if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then - export AIRFLOW_PIP_VERSION="23.0.1" + export AIRFLOW_PIP_VERSION="23.1" fi fi } diff --git a/Dockerfile.ci b/Dockerfile.ci index bd5f476ca3aa0..c3613ccc269a4 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -393,7 +393,7 @@ function common::get_airflow_version_specification() { function common::override_pip_version_if_needed() { if [[ -n ${AIRFLOW_VERSION} ]]; then if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then - export AIRFLOW_PIP_VERSION="23.0.1" + export AIRFLOW_PIP_VERSION="23.1" fi fi } @@ -782,6 +782,12 @@ if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then installable_files+=( "${file}" ) fi done + if [[ ${USE_AIRFLOW_VERSION} != "wheel" && ${USE_AIRFLOW_VERSION} != "sdist" && ${USE_AIRFLOW_VERSION} != "none" ]]; then + echo + echo "${COLOR_BLUE}Also adding airflow in specified version ${USE_AIRFLOW_VERSION} to make sure it is not upgraded by >= limits${COLOR_RESET}" + echo + installable_files+=( "apache-airflow==${USE_AIRFLOW_VERSION}" ) + fi if (( ${#installable_files[@]} )); then pip install --root-user-action ignore "${installable_files[@]}" fi @@ -862,8 +868,13 @@ if [[ "${RUN_TESTS}" != "true" ]]; then fi set -u -export RESULT_LOG_FILE="/files/test_result-${TEST_TYPE/\[*\]/}-${BACKEND}.xml" -export WARNINGS_FILE="/files/warnings-${TEST_TYPE/\[*\]/}-${BACKEND}.txt" +if [[ ${HELM_TEST_PACKAGE=} != "" ]]; then + export RESULT_LOG_FILE="/files/test_result-${TEST_TYPE/\[*\]/}-${HELM_TEST_PACKAGE}-${BACKEND}.xml" + export WARNINGS_FILE="/files/warnings-${TEST_TYPE/\[*\]/}-${HELM_TEST_PACKAGE}-${BACKEND}.txt" +else + export RESULT_LOG_FILE="/files/test_result-${TEST_TYPE/\[*\]/}-${BACKEND}.xml" + export WARNINGS_FILE="/files/warnings-${TEST_TYPE/\[*\]/}-${BACKEND}.txt" +fi EXTRA_PYTEST_ARGS=( "--verbosity=0" @@ -948,7 +959,10 @@ declare -a SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TES function find_all_other_tests() { local all_tests_dirs - all_tests_dirs=$(find "tests" -type d ! -name '__pycache__') + # The output of the find command should be sorted to make sure that the order is always the same + # when we run the tests, to avoid cross-package side effects causing different test results + # in different environments. See https://github.com/apache/airflow/pull/30588 for example. + all_tests_dirs=$(find "tests" -type d ! -name '__pycache__' | sort) all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests$/d" ) all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests\/dags/d" ) local path @@ -1016,7 +1030,11 @@ else elif [[ ${TEST_TYPE:=""} == "WWW" ]]; then SELECTED_TESTS=("${WWW_TESTS[@]}") elif [[ ${TEST_TYPE:=""} == "Helm" ]]; then - SELECTED_TESTS=("${HELM_CHART_TESTS[@]}") + if [[ ${HELM_TEST_PACKAGE=} != "" ]]; then + SELECTED_TESTS=("tests/charts/${HELM_TEST_PACKAGE}") + else + SELECTED_TESTS=("${HELM_CHART_TESTS[@]}") + fi elif [[ ${TEST_TYPE:=""} == "Integration" ]]; then if [[ ${SKIP_PROVIDER_TESTS:=""} == "true" ]]; then SELECTED_TESTS=("${NO_PROVIDERS_INTEGRATION_TESTS[@]}") @@ -1240,7 +1258,7 @@ ARG AIRFLOW_CI_BUILD_EPOCH="4"0 ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" # By default in the image, we are installing all providers when installing from sources ARG INSTALL_PROVIDERS_FROM_SOURCES="true" -ARG AIRFLOW_PIP_VERSION=23.0.1 +ARG AIRFLOW_PIP_VERSION=23.1 # Setup PIP # By default PIP install run without cache to make image smaller ARG PIP_NO_CACHE_DIR="true" diff --git a/IMAGES.rst b/IMAGES.rst index 88f8c9ca9a374..d54d840b001e5 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -457,7 +457,7 @@ The following build arguments (``--build-arg`` in docker build command) can be u | ``ADDITIONAL_DEV_APT_ENV`` | | Additional env variables defined | | | | when installing dev deps | +------------------------------------------+------------------------------------------+------------------------------------------+ -| ``AIRFLOW_PIP_VERSION`` | ``23.0.1`` | PIP version used. | +| ``AIRFLOW_PIP_VERSION`` | ``23.1`` | PIP version used. | +------------------------------------------+------------------------------------------+------------------------------------------+ | ``PIP_PROGRESS_BAR`` | ``on`` | Progress bar for PIP installation | +------------------------------------------+------------------------------------------+------------------------------------------+ diff --git a/INSTALL b/INSTALL index 160758778c602..067e490efd370 100644 --- a/INSTALL +++ b/INSTALL @@ -103,10 +103,10 @@ devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elastics facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, -opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, postgres, presto, -qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, -slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, -virtualenv, webhdfs, winrm, yandex, zendesk +openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, +postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, +sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, +trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk # END EXTRAS HERE # For installing Airflow in development environments - see CONTRIBUTING.rst diff --git a/PROVIDERS.rst b/PROVIDERS.rst new file mode 100644 index 0000000000000..04723174db7c5 --- /dev/null +++ b/PROVIDERS.rst @@ -0,0 +1,253 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +************************ +Apache Airflow Providers +************************ + +.. contents:: :local: + +What is a provider? +=================== + +Airflow 2.0 introduced the concept of providers. Providers are packages that contain integrations with +external systems. They are meant to extend capabilities of the core "Apache Airflow". Thus they are +part of the vision of Airflow-as-a-Platform - where the Airflow Core provides basic data-workflow scheduling +and management capabilities and can be extended by implementing Open APIs Airflow supports, adding +Plugins that can add new features to the Core, and adding Providers that allow to interact with external +systems. + +The providers are released separately from the core Airflow and they are versioned independently. The +ways how providers can extend the Airflow Core, including the types of providers, can be found at the +`Providers page `_. You can also find +out there, how you can create your own provider. + +Providers can be maintained and released by the Airflow community or by 3rd-party teams. In any case - +whether community-managed, or 3rd-party managed - they are released independently of the Airflow Core package. + +When community releases the Airflow Core, it is released together with constraints, those constraints use +the latest released version of providers, and our published convenience images contain a subset of most +popular community providers. However our users are free to upgrade and downgrade providers independently of +the Airflow Core version as they see fit, as long as it does not cause conflicting dependencies. + +You can read more about it in the +`Installation and upgrade scenarios `_ +chapter of our user documentation. + +Community managed providers +=========================== + +When providers are accepted by the community, the process of managing and releasing them must follow the +Apache Software Foundation rules and policies. This is especially, about accepting contributions and +releasing new versions of the providers. This means that the code changes in the providers must be +reviewed by Airflow committers and merged when they are accepted by them. Also we must have sufficient +test coverage and documentation that allow us to maintain the providers, and our users to use them. + +Thy providers - their latest version in "main" branch of airflow repository - are installed and tested together +with other community providers and one of the key properties of the community providers is that the latest +version of providers contribute their dependencies to constraints of Airflow, published when Airflow Core is +released. This means that when users are using constraints published by Airflow, they can install all +the providers together and they are more likely to not interfere with each other, especially they should +be able to be installed together, without conflicting dependencies. This allows to add an optional +"extra" to Airflow for each provider, so that the providers can be installed together with Airflow by +specifying the "extra" in the installation command. + +Because of the constraint and potential conflicting dependencies, the community providers have to be regularly +updated and the community might decide to suspend releases of a provider if we find out that we have trouble +with updating the dependencies, or if we find out that the provider is not compatible with other more +popular providers and when the popular providers are limited by the constraints of the less popular ones. +See the section below for more details on suspending releases of the community providers. + +List of all available community providers is available at the `Providers index `_. + +Accepting new community providers +================================= + +Accepting new community providers should be a deliberate process that requires ``[DISCUSSION]`` +followed by ``[VOTE]`` thread at the airflow `devlist `_. + +In case the provider is integration with an open-source software rather than service we can relax the vote +procedure a bit. Particularly if the open-source software is an Apache Software Foundation, +Linux Software Foundation or similar organisation with well established governance processes that are not +strictly vendor-controlled, and when the software is well established an popular, it might be enough to +have a good and complete PR of the provider, ideally with a great test coverage, including integration tests, +and documentation. Then it should be enough to request the provider acceptance by a ``[LAZY CONSENSUS]`` mail +on the devlist and assuming such lazy consensus is not objected by anyone in the community, the provider +might be merged. + +For service providers, the ``[DISCUSSION]`` thread is aimed to gather information about the reasons why +the one who proposes the new provider thinks it should be accepted by the community. Maintaining the provider +in the community is a burden. Contrary to many people's beliefs, code is often liability rather than asset, +and accepting the code to be managed by the community, especially when it involves significant effort on +maintenance is often undesired, especially that the community consists of volunteers. There must be a really +good reason why we would believe that the provider is better to be maintained by the community if there +are 3rd-party teams that can be paid to manage it on their own. We have to believe that the current +community interest is in managing the provider and that enough volunteers in the community will be +willing to maintain it in the future in order to accept the provider. + +The ``[VOTE]`` thread is aimed to gather votes from the community on whether the provider should be accepted +or not and it follows the usual Apache Software Foundation voting rules concerning +`Votes on Code Modification `_ + +The Ecosystem page and registries, and own resources of the 3rd-party teams are the best places to increase +visibility that such providers exist, so there is no "great" visibility achieved by getting the provider in +the community. Also it is often easier to advertise and promote usage of the provider by the service providers +themselves when they own, manage and release their provider, especially when they can synchronize releases +of their provider with new feature, the service might get added. + +Minimum supported version of Airflow for Community managed providers +==================================================================== + +One of the important limitations of the Providers released by the community is that we introduce the limit +of a minimum supported version of Airflow. The minimum version of Airflow is the ``MINOR`` version (2.4, 2.5 etc.) +indicating that the providers might use features that appeared in this release. The default support timespan +for the minimum version of Airflow (there could be justified exceptions) is that we increase the minimum +Airflow version to the next MINOR release, when 12 months passed since the first release for the +MINOR version of Airflow. + +For example this means that by default we upgrade the minimum version of Airflow supported by providers +to 2.4.0 in the first Provider's release after 30th of April 2023. The 30th of April 2022 is the date when the +first ``PATCHLEVEL`` of 2.3 (2.3.0) has been released. + +When we increase the minimum Airflow version, this is not a reason to bump ``MAJOR`` version of the providers +(unless there are other breaking changes in the provider). The reason for that is that people who use +older version of Airflow will not be able to use that provider (so it is not a breaking change for them) +and for people who are using supported version of Airflow this is not a breaking change on its own - they +will be able to use the new version without breaking their workflows. When we upgraded min-version to +2.2+, our approach was different but as of 2.3+ upgrade (November 2022) we only bump ``MINOR`` version of the +provider when we increase minimum Airflow version. + +Increasing the minimum version ot the Providers is one of the reasons why 3rd-party provider maintainers +might want to maintain their own providers - as they can decide to support older versions of Airflow. + +3rd-party providers +=================== + +Providers, can (and it is recommended for 3rd-party providers) also be maintained and releases by 3rd parties. + +There is no difference between the community and 3rd party providers - they have all the same capabilities +and limitations. The consensus in the Airflow community is that usually it is better for the community and +for the health of the provider to be managed by the 3rd party team, rather than by the Airflow community. +This is especially in case the provider concerns 3rd-party service that has a team that can manage provider +on their own. For the Airflow community, managing and releasing a 3rd-party provider that we cannot test +and verify is a lot of effort and uncertainty, especially including the cases where the external service is +live and going to evolve in the future, and it is better to let the 3rd party team manage it, +as they can better keep pace with the changes in the service. + +Information about such 3rd-party providers are usually published at the +`Ecosystem: plugins and providers `_ +page of the Airflow website and we encourage the service providers to publish their providers there. You can also +find a 3rd-party registries of such providers, that you can use if you search for existing providers (they +are also listed at the "Ecosystem" page in the same chapter) + +While we already have - historically - a number of 3rd-party service providers managed by the community, +most of those services have dedicated teams that keep an eye on the community providers and not only take +active part in managing them (see mixed-governance model below), but also provide a way that we can +verify whether the provider works with the latest version of the service via dashboards that show +status of System Tests for the provider. This allows us to have a high level of confidence that when we +release the provider it works with the latest version of the service. System Tests are part of the Airflow +code, but they are executed and verified by those 3rd party service teams. We are working with the 3rd +party service teams (who are often important stakeholders of the Apache Airflow project) to add dashboards +for the historical providers that are managed by the community, and current set of Dashboards can be also +found at the +`Ecosystem: system test dashboards `_ + +Mixed governance model +====================== + +Providers are often connected with some stakeholders that are vitally interested in maintaining backwards +compatibilities in their integrations (for example cloud providers, or specific service providers). But, +we are also bound with the `Apache Software Foundation release policy `_ +which describes who releases, and how to release the ASF software. The provider's governance model is something we name +``mixed governance`` - where we follow the release policies, while the burden of maintaining and testing +the cherry-picked versions is on those who commit to perform the cherry-picks and make PRs to older +branches. + +The "mixed governance" (optional, per-provider) means that: + +* The Airflow Community and release manager decide when to release those providers. + This is fully managed by the community and the usual release-management process following the + `Apache Software Foundation release policy `_ +* The contributors (who might or might not be direct stakeholders in the provider) will carry the burden + of cherry-picking and testing the older versions of providers. +* There is no "selection" and acceptance process to determine which version of the provider is released. + It is determined by the actions of contributors raising the PR with cherry-picked changes and it follows + the usual PR review process where maintainer approves (or not) and merges (or not) such PR. Simply + speaking - the completed action of cherry-picking and testing the older version of the provider make + it eligible to be released. Unless there is someone who volunteers and perform the cherry-picking and + testing, the provider is not released. +* Branches to raise PR against are created when a contributor commits to perform the cherry-picking + (as a comment in PR to cherry-pick for example) + +Usually, community effort is focused on the most recent version of each provider. The community approach is +that we should rather aggressively remove deprecations in "major" versions of the providers - whenever +there is an opportunity to increase major version of a provider, we attempt to remove all deprecations. +However, sometimes there is a contributor (who might or might not represent stakeholder), +willing to make their effort on cherry-picking and testing the non-breaking changes to a selected, +previous major branch of the provider. This results in releasing at most two versions of a +provider at a time: + +* potentially breaking "latest" major version +* selected past major version with non-breaking changes applied by the contributor + +Cherry-picking such changes follows the same process for releasing Airflow +patch-level releases for a previous minor Airflow version. Usually such cherry-picking is done when +there is an important bugfix and the latest version contains breaking changes that are not +coupled with the bugfix. Releasing them together in the latest version of the provider effectively couples +them, and therefore they're released separately. The cherry-picked changes have to be merged by the committer following the usual rules of the +community. + +There is no obligation to cherry-pick and release older versions of the providers. +The community continues to release such older versions of the providers for as long as there is an effort +of the contributors to perform the cherry-picks and carry-on testing of the older provider version. + +The availability of stakeholder that can manage "service-oriented" maintenance and agrees to such a +responsibility, will also drive our willingness to accept future, new providers to become community managed. + +Suspending releases for providers +================================= + +In case a provider is found to require old dependencies that are not compatible with upcoming versions of +the Apache Airflow or with newer dependencies required by other providers, the provider's release +process can be suspended. + +This means: + +* The provider's status is set to "suspended" +* No new releases of the provider will be made until the problem with dependencies is solved +* Sources of the provider remain in the repository for now (in the future we might add process to remove them) +* No new changes will be accepted for the provider (other than the ones that fix the dependencies) +* The provider will be removed from the list of Apache Airflow extras in the next Airflow release + (including patch-level release if it is possible/easy to cherry-pick the suspension change) +* Tests of the provider will not be run on our CI (in main branch) +* Dependencies of the provider will not be installed in our main branch CI image nor included in constraints +* We can still decide to apply security fixes to released providers - by adding fixes to the main branch + but cherry-picking, testing and releasing them in the patch-level branch of the provider similar to the + mixed governance model described above. + +The suspension may be triggered by any committer after the following criteria are met: + +* The maintainers of dependencies of the provider are notified about the issue and are given a reasonable + time to resolve it (at least 1 week) +* Other options to resolve the issue have been exhausted and there are good reasons for upgrading + the old dependencies in question +* Explanation why we need to suspend the provider is stated in a public discussion in the devlist. Followed + by ``[LAZY CONSENSUS]`` or ``[VOTE]`` discussion at the devlist (with the majority of the binding votes + agreeing that we should suspend the provider) + +The suspension will be lifted when the dependencies of the provider are made compatible with the Apache +Airflow and with other providers - by merging a PR that removes the suspension and succeeds. diff --git a/README.md b/README.md index ceb079fc3f40b..a5e13d824f2b4 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,6 @@ Use Airflow to author workflows as directed acyclic graphs (DAGs) of tasks. The - [Support for Python and Kubernetes versions](#support-for-python-and-kubernetes-versions) - [Base OS support for reference Airflow images](#base-os-support-for-reference-airflow-images) - [Approach to dependencies of Airflow](#approach-to-dependencies-of-airflow) -- [Release process for Providers](#release-process-for-providers) - [Contributing](#contributing) - [Who uses Apache Airflow?](#who-uses-apache-airflow) - [Who Maintains Apache Airflow?](#who-maintains-apache-airflow) @@ -397,117 +396,17 @@ The important dependencies are: ### Approach for dependencies in Airflow Providers and extras +The main part of the Airflow is the Airflow Core, but the power of Airflow also comes from a number of +providers that extend the core functionality and are released separately, even if we keep them (for now) +in the same monorepo for convenience. You can read more about the providers in the +[Providers documentation](https://airflow.apache.org/docs/apache-airflow-providers/index.html). We also +have set of policies implemented for maintaining and releasing community-managed providers as well +as the approach for community vs. 3rd party providers in the [providers](PROVIDERS.rst) document. + Those `extras` and `providers` dependencies are maintained in `provider.yaml` of each provider. By default, we should not upper-bound dependencies for providers, however each provider's maintainer -might decide to add additional limits (and justify them with comment) - -## Release process for Providers - -### Minimum supported version of Airflow - -Providers released by the community (with roughly monthly cadence) have -limitation of a minimum supported version of Airflow. The minimum version of -Airflow is the `MINOR` version (2.2, 2.3 etc.) indicating that the providers -might use features that appeared in this release. The default support timespan -for the minimum version of Airflow (there could be justified exceptions) is -that we increase the minimum Airflow version, when 12 months passed since the -first release for the MINOR version of Airflow. - -For example this means that by default we upgrade the minimum version of Airflow supported by providers -to 2.4.0 in the first Provider's release after 30th of April 2023. The 30th of April 2022 is the date when the -first `PATCHLEVEL` of 2.3 (2.3.0) has been released. - -When we increase the minimum Airflow version, this is not a reason to bump `MAJOR` version of the providers -(unless there are other breaking changes in the provider). The reason for that is that people who use -older version of Airflow will not be able to use that provider (so it is not a breaking change for them) -and for people who are using supported version of Airflow this is not a breaking change on its own - they -will be able to use the new version without breaking their workflows. When we upgraded min-version to -2.2+, our approach was different but as of 2.3+ upgrade (November 2022) we only bump `MINOR` version of the -provider when we increase minimum Airflow version. - -### Mixed governance model - -Providers are often connected with some stakeholders that are vitally interested in maintaining backwards -compatibilities in their integrations (for example cloud providers, or specific service providers). But, -we are also bound with the [Apache Software Foundation release policy](https://www.apache.org/legal/release-policy.html) -which describes who releases, and how to release the ASF software. The provider's governance model is something we name -"mixed governance" - where we follow the release policies, while the burden of maintaining and testing -the cherry-picked versions is on those who commit to perform the cherry-picks and make PRs to older -branches. - -The "mixed governance" (optional, per-provider) means that: - -* The Airflow Community and release manager decide when to release those providers. - This is fully managed by the community and the usual release-management process following the - [Apache Software Foundation release policy](https://www.apache.org/legal/release-policy.html) -* The contributors (who might or might not be direct stakeholders in the provider) will carry the burden - of cherry-picking and testing the older versions of providers. -* There is no "selection" and acceptance process to determine which version of the provider is released. - It is determined by the actions of contributors raising the PR with cherry-picked changes and it follows - the usual PR review process where maintainer approves (or not) and merges (or not) such PR. Simply - speaking - the completed action of cherry-picking and testing the older version of the provider make - it eligible to be released. Unless there is someone who volunteers and perform the cherry-picking and - testing, the provider is not released. -* Branches to raise PR against are created when a contributor commits to perform the cherry-picking - (as a comment in PR to cherry-pick for example) - -Usually, community effort is focused on the most recent version of each provider. The community approach is -that we should rather aggressively remove deprecations in "major" versions of the providers - whenever -there is an opportunity to increase major version of a provider, we attempt to remove all deprecations. -However, sometimes there is a contributor (who might or might not represent stakeholder), -willing to make their effort on cherry-picking and testing the non-breaking changes to a selected, -previous major branch of the provider. This results in releasing at most two versions of a -provider at a time: - -* potentially breaking "latest" major version -* selected past major version with non-breaking changes applied by the contributor - -Cherry-picking such changes follows the same process for releasing Airflow -patch-level releases for a previous minor Airflow version. Usually such cherry-picking is done when -there is an important bugfix and the latest version contains breaking changes that are not -coupled with the bugfix. Releasing them together in the latest version of the provider effectively couples -them, and therefore they're released separately. The cherry-picked changes have to be merged by the committer following the usual rules of the -community. - -There is no obligation to cherry-pick and release older versions of the providers. -The community continues to release such older versions of the providers for as long as there is an effort -of the contributors to perform the cherry-picks and carry-on testing of the older provider version. - -The availability of stakeholder that can manage "service-oriented" maintenance and agrees to such a -responsibility, will also drive our willingness to accept future, new providers to become community managed. - -### Suspending releases for providers - -In case a provider is found to require old dependencies that are not compatible with upcoming versions of -the Apache Airflow or with newer dependencies required by other providers, the provider's release -process can be suspended. - -This means: - -* The provider's status is set to "suspended" -* No new releases of the provider will be made until the problem with dependencies is solved -* Sources of the provider remain in the repository for now (in the future we might add process to remove them) -* No new changes will be accepted for the provider (other than the ones that fix the dependencies) -* The provider will be removed from the list of Apache Airflow extras in the next Airflow release - (including patch-level release if it is possible/easy to cherry-pick the suspension change) -* Tests of the provider will not be run on our CI (in main branch) -* Dependencies of the provider will not be installed in our main branch CI image nor included in constraints -* We can still decide to apply security fixes to released providers - by adding fixes to the main branch - but cherry-picking, testing and releasing them in the patch-level branch of the provider similar to the - mixed governance model described above. - -The suspension may be triggered by any committer after the following criteria are met: - -* The maintainers of dependencies of the provider are notified about the issue and are given a reasonable - time to resolve it (at least 1 week) -* Other options to resolve the issue have been exhausted and there are good reasons for upgrading - the old dependencies in question -* Explanation why we need to suspend the provider is stated in a public discussion in the devlist. Followed - by LAZY CONSENSUS or VOTE (with the majority of the voters agreeing that we should suspend the provider) - -The suspension will be lifted when the dependencies of the provider are made compatible with the Apache -Airflow and with other providers. +might decide to add additional limits (and justify them with comment). ## Contributing diff --git a/TESTING.rst b/TESTING.rst index a3d773799e175..a89b2d287c959 100644 --- a/TESTING.rst +++ b/TESTING.rst @@ -606,7 +606,7 @@ Example test here: .. code-block:: python - from tests.charts.helm_template_generator import render_chart, render_k8s_object + from tests.charts.common.helm_template_generator import render_chart, render_k8s_object git_sync_basic = """ dags: @@ -634,6 +634,16 @@ following command (but it takes quite a long time even in a multi-processor mach breeze testing helm-tests +You can also execute tests from a selected package only. Tests in ``tests/chart`` are grouped by packages +so rather than running all tests, you can run only tests from a selected package. For example: + +.. code-block:: bash + + breeze testing helm-tests --helm-test-package basic + +Will run all tests from ``tests/charts/basic`` package. + + You can also run Helm tests individually via the usual ``breeze`` command. Just enter breeze and run the tests with pytest as you would do with regular unit tests (you can add ``-n auto`` command to run Helm tests in parallel - unlike most of the regular unit tests of ours that require a database, the Helm tests are diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index e08b4e01ec741..ffcfd92de03b6 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -1101,6 +1101,12 @@ class GroupCommand(NamedTuple): CLICommand = Union[ActionCommand, GroupCommand] DAGS_COMMANDS = ( + ActionCommand( + name="details", + help="Get DAG details given a DAG id", + func=lazy_load_command("airflow.cli.commands.dag_command.dag_details"), + args=(ARG_DAG_ID, ARG_OUTPUT, ARG_VERBOSE), + ), ActionCommand( name="list", help="List all the DAGs", diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index 8735a0e522715..c81f1c0a47a45 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -32,6 +32,7 @@ from airflow import settings from airflow.api.client import get_current_api_client +from airflow.api_connexion.schemas.dag_schema import dag_schema from airflow.cli.simple_table import AirflowConsole from airflow.configuration import conf from airflow.exceptions import AirflowException, RemovedInAirflow3Warning @@ -346,6 +347,27 @@ def dag_list_dags(args) -> None: ) +@cli_utils.action_cli +@suppress_logs_and_warning +@provide_session +def dag_details(args, session=NEW_SESSION): + """Get DAG details given a DAG id""" + dag = DagModel.get_dagmodel(args.dag_id, session=session) + if not dag: + raise SystemExit(f"DAG: {args.dag_id} does not exist in 'dag' table") + dag_detail = dag_schema.dump(dag) + + if args.output in ["table", "plain"]: + data = [{"property_name": key, "property_value": value} for key, value in dag_detail.items()] + else: + data = [dag_detail] + + AirflowConsole().print_as( + data=data, + output=args.output, + ) + + @cli_utils.action_cli @suppress_logs_and_warning def dag_list_import_errors(args) -> None: diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 29e02df9b3b6b..a65012c7fa876 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1692,6 +1692,13 @@ webserver: type: string example: ~ default: "5 per 40 second" + caching_hash_method: + description: | + The caching algorithm used by the webserver. Must be a valid hashlib function name. + version_added: 2.6.0 + type: string + example: "sha256" + default: "md5" email: description: | @@ -2047,26 +2054,6 @@ celery: type: boolean example: ~ default: "True" - task_adoption_timeout: - description: | - Time in seconds after which adopted tasks which are queued in celery are assumed to be stalled, - and are automatically rescheduled. This setting does the same thing as ``stalled_task_timeout`` but - applies specifically to adopted tasks only. When set to 0, the ``stalled_task_timeout`` setting - also applies to adopted tasks. To calculate adoption time, subtract the - :ref:`task duration` from the task's :ref:`landing time`. - version_added: 2.0.0 - type: integer - example: ~ - default: "600" - stalled_task_timeout: - description: | - Time in seconds after which tasks queued in celery are assumed to be stalled, and are automatically - rescheduled. Adopted tasks will instead use the ``task_adoption_timeout`` setting if specified. - When set to 0, automatic clearing of stalled tasks is disabled. - version_added: 2.3.1 - type: integer - example: ~ - default: "0" task_publish_max_retries: description: | The Maximum number of retries for publishing task messages to the broker when failing @@ -2344,6 +2331,16 @@ scheduler: version_added: 2.0.0 type: boolean default: "True" + parsing_pre_import_modules: + description: | + The scheduler reads dag files to extract the airflow modules that are going to be used, + and imports them ahead of time to avoid having to re-do it for each parsing process. + This flag can be set to False to disable this behavior in case an airflow module needs to be freshly + imported each time (at the cost of increased DAG parsing time). + version_added: 2.6.0 + type: boolean + example: ~ + default: "True" parsing_processes: description: | The scheduler can run multiple processes in parallel to parse dags. @@ -2363,7 +2360,6 @@ scheduler: same host. This is useful when running with Scheduler in HA mode where each scheduler can parse different DAG files. * ``alphabetical``: Sort by filename - version_added: 2.1.0 type: string example: ~ @@ -2415,6 +2411,21 @@ scheduler: type: string example: ~ default: "15" + task_queued_timeout: + description: | + Amount of time a task can be in the queued state before being retried or set to failed. + version_added: 2.6.0 + type: float + example: ~ + default: "600.0" + task_queued_timeout_check_interval: + description: | + How often to check for tasks that have been in the queued state for + longer than `[scheduler] task_queued_timeout`. + version_added: 2.6.0 + type: float + example: ~ + default: "120.0" triggerer: description: ~ options: @@ -2570,6 +2581,13 @@ kubernetes_executor: previous_name: kubernetes version: 2.5.0 options: + api_client_retry_configuration: + description: | + Kwargs to override the default urllib3 Retry used in the kubernetes API client + version_added: 2.6.0 + type: string + example: '{ "total": 3, "backoff_factor": 0.5 }' + default: "" pod_template_file: description: | Path to the YAML pod file that forms the basis for KubernetesExecutor workers. @@ -2731,20 +2749,6 @@ kubernetes_executor: type: boolean example: ~ default: "True" - worker_pods_pending_timeout: - description: | - How long in seconds a worker can be in Pending before it is considered a failure - version_added: 2.1.0 - type: integer - example: ~ - default: "300" - worker_pods_pending_timeout_check_interval: - description: | - How often in seconds to check if Pending workers have exceeded their timeouts - version_added: 2.1.0 - type: integer - example: ~ - default: "120" worker_pods_queued_check_interval: description: | How often in seconds to check for task instances stuck in "queued" status without a pod @@ -2752,14 +2756,6 @@ kubernetes_executor: type: integer example: ~ default: "60" - worker_pods_pending_timeout_batch_size: - description: | - How many pending pods to check for timeout violations in each check interval. - You may want this higher if you have a very large cluster and/or use ``multi_namespace_mode``. - version_added: 2.1.0 - type: integer - example: ~ - default: "100" ssl_ca_cert: description: | Path to a CA certificate to be used by the Kubernetes client to verify the server's SSL certificate. diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index 257536cc038fd..684a78992d6c2 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -856,6 +856,10 @@ auth_rate_limited = True # Rate limit for authentication endpoints. auth_rate_limit = 5 per 40 second +# The caching algorithm used by the webserver. Must be a valid hashlib function name. +# Example: caching_hash_method = sha256 +caching_hash_method = md5 + [email] # Configuration email backend and whether to @@ -1033,18 +1037,6 @@ operation_timeout = 1.0 # or run in HA mode, it can adopt the orphan tasks launched by previous SchedulerJob. task_track_started = True -# Time in seconds after which adopted tasks which are queued in celery are assumed to be stalled, -# and are automatically rescheduled. This setting does the same thing as ``stalled_task_timeout`` but -# applies specifically to adopted tasks only. When set to 0, the ``stalled_task_timeout`` setting -# also applies to adopted tasks. To calculate adoption time, subtract the -# :ref:`task duration` from the task's :ref:`landing time`. -task_adoption_timeout = 600 - -# Time in seconds after which tasks queued in celery are assumed to be stalled, and are automatically -# rescheduled. Adopted tasks will instead use the ``task_adoption_timeout`` setting if specified. -# When set to 0, automatic clearing of stalled tasks is disabled. -stalled_task_timeout = 0 - # The Maximum number of retries for publishing task messages to the broker when failing # due to ``AirflowTaskTimeout`` error before giving up and marking Task as failed. task_publish_max_retries = 3 @@ -1190,6 +1182,12 @@ max_dagruns_per_loop_to_schedule = 20 # dags in some circumstances schedule_after_task_execution = True +# The scheduler reads dag files to extract the airflow modules that are going to be used, +# and imports them ahead of time to avoid having to re-do it for each parsing process. +# This flag can be set to False to disable this behavior in case an airflow module needs to be freshly +# imported each time (at the cost of increased DAG parsing time). +parsing_pre_import_modules = True + # The scheduler can run multiple processes in parallel to parse dags. # This defines how many processes will run. parsing_processes = 2 @@ -1228,6 +1226,13 @@ allow_trigger_in_future = False # How often to check for expired trigger requests that have not run yet. trigger_timeout_check_interval = 15 +# Amount of time a task can be in the queued state before being retried or set to failed. +task_queued_timeout = 600.0 + +# How often to check for tasks that have been in the queued state for +# longer than `[scheduler] task_queued_timeout`. +task_queued_timeout_check_interval = 120.0 + [triggerer] # How many triggers a single Triggerer will run at once, by default. default_capacity = 1000 @@ -1287,6 +1292,10 @@ use_ssl = False verify_certs = True [kubernetes_executor] +# Kwargs to override the default urllib3 Retry used in the kubernetes API client +# Example: api_client_retry_configuration = {{ "total": 3, "backoff_factor": 0.5 }} +api_client_retry_configuration = + # Path to the YAML pod file that forms the basis for KubernetesExecutor workers. pod_template_file = @@ -1372,19 +1381,9 @@ tcp_keep_cnt = 6 # Set this to false to skip verifying SSL certificate of Kubernetes python client. verify_ssl = True -# How long in seconds a worker can be in Pending before it is considered a failure -worker_pods_pending_timeout = 300 - -# How often in seconds to check if Pending workers have exceeded their timeouts -worker_pods_pending_timeout_check_interval = 120 - # How often in seconds to check for task instances stuck in "queued" status without a pod worker_pods_queued_check_interval = 60 -# How many pending pods to check for timeout violations in each check interval. -# You may want this higher if you have a very large cluster and/or use ``multi_namespace_mode``. -worker_pods_pending_timeout_batch_size = 100 - # Path to a CA certificate to be used by the Kubernetes client to verify the server's SSL certificate. ssl_ca_cert = diff --git a/airflow/configuration.py b/airflow/configuration.py index b28d173fc17ac..fed7e5930d415 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -234,6 +234,22 @@ class AirflowConfigParser(ConfigParser): ("database", "load_default_connections"): ("core", "load_default_connections", "2.3.0"), ("database", "max_db_retries"): ("core", "max_db_retries", "2.3.0"), ("scheduler", "parsing_cleanup_interval"): ("scheduler", "deactivate_stale_dags_interval", "2.5.0"), + ("scheduler", "task_queued_timeout_check_interval"): ( + "kubernetes_executor", + "worker_pods_pending_timeout_check_interval", + "2.6.0", + ), + } + + # A mapping of new configurations to a list of old configurations for when one configuration + # deprecates more than one other deprecation. The deprecation logic for these configurations + # is defined in SchedulerJobRunner. + many_to_one_deprecated_options: dict[tuple[str, str], list[tuple[str, str, str]]] = { + ("scheduler", "task_queued_timeout"): [ + ("celery", "stalled_task_timeout", "2.6.0"), + ("celery", "task_adoption_timeout", "2.6.0"), + ("kubernetes_executor", "worker_pods_pending_timeout", "2.6.0"), + ] } # A mapping of new section -> (old section, since_version). @@ -548,12 +564,10 @@ def get_mandatory_value(self, section: str, key: str, **kwargs) -> str: @overload # type: ignore[override] def get(self, section: str, key: str, fallback: str = ..., **kwargs) -> str: # type: ignore[override] - ... @overload # type: ignore[override] def get(self, section: str, key: str, **kwargs) -> str | None: # type: ignore[override] - ... def get( # type: ignore[override, misc] @@ -1070,7 +1084,7 @@ def as_dict( # This ensures the ones from config file is hidden too # if they are not provided through env, cmd and secret hidden = "< hidden >" - for (section, key) in self.sensitive_config_values: + for section, key in self.sensitive_config_values: if not config_sources.get(section): continue if config_sources[section].get(key, None): @@ -1089,7 +1103,7 @@ def _include_secrets( display_source: bool, raw: bool, ): - for (section, key) in self.sensitive_config_values: + for section, key in self.sensitive_config_values: value: str | None = self._get_secret_option_from_config_sources(config_sources, section, key) if value: if not display_sensitive: @@ -1110,7 +1124,7 @@ def _include_commands( display_source: bool, raw: bool, ): - for (section, key) in self.sensitive_config_values: + for section, key in self.sensitive_config_values: opt = self._get_cmd_option_from_config_sources(config_sources, section, key) if not opt: continue @@ -1188,7 +1202,7 @@ def _filter_by_source( :return: None, the given config_sources is filtered if necessary, otherwise untouched. """ - for (section, key) in self.sensitive_config_values: + for section, key in self.sensitive_config_values: # Don't bother if we don't have section / key if section not in config_sources or key not in config_sources[section]: continue @@ -1222,7 +1236,7 @@ def _replace_config_with_display_sources( include_cmds: bool, include_secret: bool, ): - for (source_name, config) in configs: + for source_name, config in configs: for section in config.sections(): AirflowConfigParser._replace_section_config_with_display_sources( config, @@ -1249,7 +1263,7 @@ def _deprecated_value_is_set_in_config( continue try: deprecated_section_array = config.items(section=deprecated_section, raw=True) - for (key_candidate, _) in deprecated_section_array: + for key_candidate, _ in deprecated_section_array: if key_candidate == deprecated_key: return True except NoSectionError: diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index f1ba08e496aa0..c6f07af0b6167 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import importlib import logging import multiprocessing import os @@ -50,6 +51,7 @@ from airflow.stats import Stats from airflow.utils import timezone from airflow.utils.email import get_email_address_list, send_email +from airflow.utils.file import iter_airflow_imports from airflow.utils.log.logging_mixin import LoggingMixin, StreamLogWriter, set_context from airflow.utils.mixins import MultiprocessingStartMethodMixin from airflow.utils.session import NEW_SESSION, provide_session @@ -187,6 +189,23 @@ def _handle_dag_file_processing(): def start(self) -> None: """Launch the process and start processing the DAG.""" + if conf.getboolean("scheduler", "parsing_pre_import_modules", fallback=True): + # Read the file to pre-import airflow modules used. + # This prevents them from being re-imported from zero in each "processing" process + # and saves CPU time and memory. + for module in iter_airflow_imports(self.file_path): + try: + importlib.import_module(module) + except Exception as e: + # only log as warning because an error here is not preventing anything from working, and + # if it's serious, it's going to be surfaced to the user when the dag is actually parsed. + self.log.warning( + "Error when trying to pre-import module '%s' found in %s: %s", + module, + self.file_path, + e, + ) + context = self._get_multiprocessing_context() _parent_channel, _child_channel = context.Pipe(duplex=False) @@ -440,9 +459,7 @@ def manage_slas(cls, dag_folder, dag_id: str, session: Session = NEW_SESSION) -> timestamp=ts, ) sla_misses.append(sla_miss) - Stats.incr( - "sla_missed", tags={"dag_id": ti.dag_id, "run_id": ti.run_id, "task_id": ti.task_id} - ) + Stats.incr("sla_missed", tags={"dag_id": ti.dag_id, "task_id": ti.task_id}) if sla_misses: session.add_all(sla_misses) session.commit() @@ -747,7 +764,7 @@ def _get_dagbag(cls, file_path: str): return DagBag(file_path, include_examples=False) except Exception: cls.logger().exception("Failed at reloading the DAG file %s", file_path) - Stats.incr("dag_file_refresh_error", 1, 1) + Stats.incr("dag_file_refresh_error", tags={"file_path": file_path}) raise @provide_session diff --git a/airflow/decorators/task_group.py b/airflow/decorators/task_group.py index 2aa714be3b6c5..f0d510193c24d 100644 --- a/airflow/decorators/task_group.py +++ b/airflow/decorators/task_group.py @@ -114,13 +114,15 @@ def _create_task_group(self, tg_factory: Callable[..., TaskGroup], *args: Any, * return task_group def override(self, **kwargs: Any) -> _TaskGroupFactory[FParams, FReturn]: - return attr.evolve(self, tg_kwargs={**self.tg_kwargs, **kwargs}) + # TODO: fixme when mypy gets compatible with new attrs + return attr.evolve(self, tg_kwargs={**self.tg_kwargs, **kwargs}) # type: ignore[arg-type] def partial(self, **kwargs: Any) -> _TaskGroupFactory[FParams, FReturn]: self._validate_arg_names("partial", kwargs) prevent_duplicates(self.partial_kwargs, kwargs, fail_reason="duplicate partial") kwargs.update(self.partial_kwargs) - return attr.evolve(self, partial_kwargs=kwargs) + # TODO: fixme when mypy gets compatible with new attrs + return attr.evolve(self, partial_kwargs=kwargs) # type: ignore[arg-type] def expand(self, **kwargs: OperatorExpandArgument) -> DAGNode: if not kwargs: diff --git a/airflow/example_dags/plugins/workday.py b/airflow/example_dags/plugins/workday.py index 92368de0ae48c..20363a69e7a4b 100644 --- a/airflow/example_dags/plugins/workday.py +++ b/airflow/example_dags/plugins/workday.py @@ -18,28 +18,37 @@ """Plugin to demonstrate timetable registration and accommodate example DAGs.""" from __future__ import annotations +import logging from datetime import timedelta # [START howto_timetable] -from pandas.tseries.holiday import USFederalHolidayCalendar from pendulum import UTC, Date, DateTime, Time from airflow.plugins_manager import AirflowPlugin from airflow.timetables.base import DagRunInfo, DataInterval, TimeRestriction, Timetable +log = logging.getLogger(__name__) +try: + from pandas.tseries.holiday import USFederalHolidayCalendar + + holiday_calendar = USFederalHolidayCalendar() +except ImportError: + log.warning("Could not import pandas. Holidays will not be considered.") + holiday_calendar = None + class AfterWorkdayTimetable(Timetable): def get_next_workday(self, d: DateTime, incr=1) -> DateTime: - cal = USFederalHolidayCalendar() next_start = d while True: if next_start.weekday() in (5, 6): # If next start is in the weekend go to next day next_start = next_start + incr * timedelta(days=1) continue - holidays = cal.holidays(start=next_start, end=next_start).to_pydatetime() - if next_start in holidays: # If next start is a holiday go to next day - next_start = next_start + incr * timedelta(days=1) - continue + if holiday_calendar is not None: + holidays = holiday_calendar.holidays(start=next_start, end=next_start).to_pydatetime() + if next_start in holidays: # If next start is a holiday go to next day + next_start = next_start + incr * timedelta(days=1) + continue break return next_start diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py index 8b72c919f0fdf..4f9ac0c4f60f7 100644 --- a/airflow/executors/base_executor.py +++ b/airflow/executors/base_executor.py @@ -217,9 +217,19 @@ def heartbeat(self) -> None: self.log.debug("%s in queue", num_queued_tasks) self.log.debug("%s open slots", open_slots) - Stats.gauge("executor.open_slots", open_slots) - Stats.gauge("executor.queued_tasks", num_queued_tasks) - Stats.gauge("executor.running_tasks", num_running_tasks) + Stats.gauge( + "executor.open_slots", value=open_slots, tags={"status": "open", "name": self.__class__.__name__} + ) + Stats.gauge( + "executor.queued_tasks", + value=num_queued_tasks, + tags={"status": "queued", "name": self.__class__.__name__}, + ) + Stats.gauge( + "executor.running_tasks", + value=num_running_tasks, + tags={"status": "running", "name": self.__class__.__name__}, + ) self.trigger_tasks(open_slots) @@ -376,6 +386,18 @@ def terminate(self): """This method is called when the daemon receives a SIGTERM.""" raise NotImplementedError() + def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: # pragma: no cover + """ + Handle remnants of tasks that were failed because they were stuck in queued. + Tasks can get stuck in queued. If such a task is detected, it will be marked + as `UP_FOR_RETRY` if the task instance has remaining retries or marked as `FAILED` + if it doesn't. + + :param tis: List of Task Instances to clean up + :return: List of readable task instances for a warning message + """ + raise NotImplementedError() + def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[TaskInstance]: """ Try to adopt running task instances that have been abandoned by a SchedulerJob dying. diff --git a/airflow/executors/celery_executor.py b/airflow/executors/celery_executor.py index 63502f61ed872..0f72df2a511cf 100644 --- a/airflow/executors/celery_executor.py +++ b/airflow/executors/celery_executor.py @@ -23,7 +23,6 @@ """ from __future__ import annotations -import datetime import logging import math import operator @@ -33,7 +32,6 @@ import traceback from collections import Counter from concurrent.futures import ProcessPoolExecutor -from enum import Enum from multiprocessing import cpu_count from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, Optional, Sequence, Tuple @@ -43,7 +41,6 @@ from celery.result import AsyncResult from celery.signals import import_modules as celery_import_modules from setproctitle import setproctitle -from sqlalchemy.orm.session import Session import airflow.settings as settings from airflow.config_templates.default_celery import DEFAULT_CELERY_CONFIG @@ -54,10 +51,8 @@ from airflow.utils.dag_parsing_context import _airflow_parsing_context_manager from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.net import get_hostname -from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import State from airflow.utils.timeout import timeout -from airflow.utils.timezone import utcnow if TYPE_CHECKING: from airflow.executors.base_executor import CommandType, EventBufferValueType, TaskTuple @@ -214,11 +209,6 @@ def on_celery_import_modules(*args, **kwargs): pass -class _CeleryPendingTaskTimeoutType(Enum): - ADOPTED = 1 - STALLED = 2 - - class CeleryExecutor(BaseExecutor): """ CeleryExecutor is recommended for production use of Airflow. @@ -244,15 +234,6 @@ def __init__(self): self._sync_parallelism = max(1, cpu_count() - 1) self.bulk_state_fetcher = BulkStateFetcher(self._sync_parallelism) self.tasks = {} - self.stalled_task_timeouts: dict[TaskInstanceKey, datetime.datetime] = {} - self.stalled_task_timeout = datetime.timedelta( - seconds=conf.getint("celery", "stalled_task_timeout", fallback=0) - ) - self.adopted_task_timeouts: dict[TaskInstanceKey, datetime.datetime] = {} - self.task_adoption_timeout = ( - datetime.timedelta(seconds=conf.getint("celery", "task_adoption_timeout", fallback=600)) - or self.stalled_task_timeout - ) self.task_publish_retries: Counter[TaskInstanceKey] = Counter() self.task_publish_max_retries = conf.getint("celery", "task_publish_max_retries", fallback=3) @@ -302,7 +283,6 @@ def _process_tasks(self, task_tuples: list[TaskTuple]) -> None: result.backend = cached_celery_backend self.running.add(key) self.tasks[key] = result - self._set_celery_pending_task_timeout(key, _CeleryPendingTaskTimeoutType.STALLED) # Store the Celery task_id in the event buffer. This will get "overwritten" if the task # has another event, but that is fine, because the only other events are success/failed at @@ -333,99 +313,6 @@ def sync(self) -> None: self.log.debug("No task to query celery, skipping sync") return self.update_all_task_states() - self._check_for_timedout_adopted_tasks() - self._check_for_stalled_tasks() - - def _check_for_timedout_adopted_tasks(self) -> None: - timedout_keys = self._get_timedout_ti_keys(self.adopted_task_timeouts) - if timedout_keys: - self.log.error( - "Adopted tasks were still pending after %s, assuming they never made it to celery " - "and sending back to the scheduler:\n\t%s", - self.task_adoption_timeout, - "\n\t".join(repr(x) for x in timedout_keys), - ) - self._send_stalled_tis_back_to_scheduler(timedout_keys) - - def _check_for_stalled_tasks(self) -> None: - timedout_keys = self._get_timedout_ti_keys(self.stalled_task_timeouts) - if timedout_keys: - self.log.error( - "Tasks were still pending after %s, assuming they never made it to celery " - "and sending back to the scheduler:\n\t%s", - self.stalled_task_timeout, - "\n\t".join(repr(x) for x in timedout_keys), - ) - self._send_stalled_tis_back_to_scheduler(timedout_keys) - - def _get_timedout_ti_keys( - self, task_timeouts: dict[TaskInstanceKey, datetime.datetime] - ) -> list[TaskInstanceKey]: - """ - Evaluate whether other tasks have stalled during the expected time. - - This can happen for few different reasons, - usually related to race conditions while shutting down schedulers and celery workers. - - It is, of course, always possible that these tasks are not actually - stalled - they could just be waiting in a long celery queue. - Unfortunately, there's no way for us to know for sure, so we'll just - reschedule them and let the normal scheduler loop requeue them. - """ - now = utcnow() - timedout_keys = [] - for key, stalled_after in task_timeouts.items(): - if stalled_after > now: - # Since items are stored sorted, if we get to a stalled_after - # in the future then we can stop - break - - # If the task gets updated to STARTED (which Celery does) or has - # already finished, then it will be removed from this list -- so - # the only time it's still in this list is when it a) never made it - # to celery in the first place (i.e. race condition somewhere in - # the dying executor), b) celery lost the task before execution - # started, or c) a really long celery queue and it just - # hasn't started yet -- better cancel it and let the scheduler - # re-queue rather than have this task risk stalling for ever - timedout_keys.append(key) - return timedout_keys - - @provide_session - def _send_stalled_tis_back_to_scheduler( - self, keys: list[TaskInstanceKey], session: Session = NEW_SESSION - ) -> None: - from airflow.models.taskinstance import TaskInstance - - try: - session.query(TaskInstance).filter( - TaskInstance.filter_for_tis(keys), - TaskInstance.state == State.QUEUED, - TaskInstance.queued_by_job_id == self.job_id, - ).update( - { - TaskInstance.state: State.SCHEDULED, - TaskInstance.queued_dttm: None, - TaskInstance.queued_by_job_id: None, - TaskInstance.external_executor_id: None, - }, - synchronize_session=False, - ) - session.commit() - except Exception: - self.log.exception("Error sending tasks back to scheduler") - session.rollback() - return - - for key in keys: - self._set_celery_pending_task_timeout(key, None) - self.running.discard(key) - celery_async_result = self.tasks.pop(key, None) - if celery_async_result: - try: - app.control.revoke(celery_async_result.task_id) - except Exception as ex: - self.log.error("Error revoking task instance %s from celery: %s", key, ex) def debug_dump(self) -> None: """Called in response to SIGUSR2 by the scheduler.""" @@ -433,16 +320,6 @@ def debug_dump(self) -> None: self.log.info( "executor.tasks (%d)\n\t%s", len(self.tasks), "\n\t".join(map(repr, self.tasks.items())) ) - self.log.info( - "executor.adopted_task_timeouts (%d)\n\t%s", - len(self.adopted_task_timeouts), - "\n\t".join(map(repr, self.adopted_task_timeouts.items())), - ) - self.log.info( - "executor.stalled_task_timeouts (%d)\n\t%s", - len(self.stalled_task_timeouts), - "\n\t".join(map(repr, self.stalled_task_timeouts.items())), - ) def update_all_task_states(self) -> None: """Updates states of the tasks.""" @@ -458,7 +335,6 @@ def update_all_task_states(self) -> None: def change_state(self, key: TaskInstanceKey, state: str, info=None) -> None: super().change_state(key, state, info) self.tasks.pop(key, None) - self._set_celery_pending_task_timeout(key, None) def update_task_state(self, key: TaskInstanceKey, state: str, info: Any) -> None: """Updates state of a single task.""" @@ -468,8 +344,7 @@ def update_task_state(self, key: TaskInstanceKey, state: str, info: Any) -> None elif state in (celery_states.FAILURE, celery_states.REVOKED): self.fail(key, info) elif state == celery_states.STARTED: - # It's now actually running, so we know it made it to celery okay! - self._set_celery_pending_task_timeout(key, None) + pass elif state == celery_states.PENDING: pass else: @@ -529,7 +404,6 @@ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[Task # Set the correct elements of the state dicts, then update this # like we just queried it. - self._set_celery_pending_task_timeout(ti.key, _CeleryPendingTaskTimeoutType.ADOPTED) self.tasks[ti.key] = result self.running.add(ti.key) self.update_task_state(ti.key, state, info) @@ -543,22 +417,28 @@ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[Task return not_adopted_tis - def _set_celery_pending_task_timeout( - self, key: TaskInstanceKey, timeout_type: _CeleryPendingTaskTimeoutType | None - ) -> None: + def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: """ - Set pending task timeout. + Handle remnants of tasks that were failed because they were stuck in queued. + Tasks can get stuck in queued. If such a task is detected, it will be marked + as `UP_FOR_RETRY` if the task instance has remaining retries or marked as `FAILED` + if it doesn't. - We use the fact that dicts maintain insertion order, and the the timeout for a - task is always "now + delta" to maintain the property that oldest item = first to - time out. + :param tis: List of Task Instances to clean up + :return: List of readable task instances for a warning message """ - self.adopted_task_timeouts.pop(key, None) - self.stalled_task_timeouts.pop(key, None) - if timeout_type == _CeleryPendingTaskTimeoutType.ADOPTED and self.task_adoption_timeout: - self.adopted_task_timeouts[key] = utcnow() + self.task_adoption_timeout - elif timeout_type == _CeleryPendingTaskTimeoutType.STALLED and self.stalled_task_timeout: - self.stalled_task_timeouts[key] = utcnow() + self.stalled_task_timeout + readable_tis = [] + for ti in tis: + readable_tis.append(repr(ti)) + task_instance_key = ti.key + self.fail(task_instance_key, None) + celery_async_result = self.tasks.pop(task_instance_key, None) + if celery_async_result: + try: + app.control.revoke(celery_async_result.task_id) + except Exception as ex: + self.log.error("Error revoking task instance %s from celery: %s", task_instance_key, ex) + return readable_tis def fetch_celery_task_state(async_result: AsyncResult) -> tuple[str, str | ExceptionWithTraceback, Any]: diff --git a/airflow/executors/celery_kubernetes_executor.py b/airflow/executors/celery_kubernetes_executor.py index 2f6101d14e2bf..c7bb8df62a266 100644 --- a/airflow/executors/celery_kubernetes_executor.py +++ b/airflow/executors/celery_kubernetes_executor.py @@ -199,6 +199,14 @@ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[Task *self.kubernetes_executor.try_adopt_task_instances(kubernetes_tis), ] + def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: + celery_tis = [ti for ti in tis if ti.queue != self.KUBERNETES_QUEUE] + kubernetes_tis = [ti for ti in tis if ti.queue == self.KUBERNETES_QUEUE] + return [ + *self.celery_executor.cleanup_stuck_queued_tasks(celery_tis), + *self.kubernetes_executor.cleanup_stuck_queued_tasks(kubernetes_tis), + ] + def end(self) -> None: """End celery and kubernetes executor.""" self.celery_executor.end() diff --git a/airflow/executors/kubernetes_executor.py b/airflow/executors/kubernetes_executor.py index bed56856c613e..8e4ac0dac0985 100644 --- a/airflow/executors/kubernetes_executor.py +++ b/airflow/executors/kubernetes_executor.py @@ -29,7 +29,6 @@ import time from collections import defaultdict from contextlib import suppress -from datetime import timedelta from queue import Empty, Queue from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple @@ -47,7 +46,6 @@ from airflow.kubernetes.kube_config import KubeConfig from airflow.kubernetes.kubernetes_helper_functions import annotations_to_key, create_pod_id from airflow.kubernetes.pod_generator import PodGenerator -from airflow.utils import timezone from airflow.utils.event_scheduler import EventScheduler from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.session import NEW_SESSION, provide_session @@ -129,11 +127,22 @@ def run(self) -> None: def _pod_events(self, kube_client: client.CoreV1Api, query_kwargs: dict): watcher = watch.Watch() - - if self.namespace == ALL_NAMESPACES: - return watcher.stream(kube_client.list_pod_for_all_namespaces, **query_kwargs) - else: - return watcher.stream(kube_client.list_namespaced_pod, self.namespace, **query_kwargs) + try: + if self.namespace == ALL_NAMESPACES: + return watcher.stream(kube_client.list_pod_for_all_namespaces, **query_kwargs) + else: + return watcher.stream(kube_client.list_namespaced_pod, self.namespace, **query_kwargs) + except ApiException as e: + if e.status == 410: # Resource version is too old + if self.namespace == ALL_NAMESPACES: + pods = kube_client.list_pod_for_all_namespaces(watch=False) + else: + pods = kube_client.list_namespaced_pod(namespace=self.namespace, watch=False) + resource_version = pods.metadata.resource_version + query_kwargs["resource_version"] = resource_version + return self._pod_events(kube_client=kube_client, query_kwargs=query_kwargs) + else: + raise def _run( self, @@ -586,10 +595,6 @@ def start(self) -> None: scheduler_job_id=self.scheduler_job_id, ) self.event_scheduler = EventScheduler() - self.event_scheduler.call_regular_interval( - self.kube_config.worker_pods_pending_timeout_check_interval, - self._check_worker_pods_pending_timeout, - ) self.event_scheduler.call_regular_interval( self.kube_config.worker_pods_queued_check_interval, @@ -690,7 +695,6 @@ def sync(self) -> None: ) self.fail(task[0], e) except ApiException as e: - # These codes indicate something is wrong with pod definition; otherwise we assume pod # definition is ok, and that retrying may work if e.status in (400, 422): @@ -721,38 +725,6 @@ def sync(self) -> None: next_event = self.event_scheduler.run(blocking=False) self.log.debug("Next timed event is in %f", next_event) - def _check_worker_pods_pending_timeout(self): - """Check if any pending worker pods have timed out.""" - if TYPE_CHECKING: - assert self.scheduler_job_id - - timeout = self.kube_config.worker_pods_pending_timeout - self.log.debug("Looking for pending worker pods older than %d seconds", timeout) - - kwargs = { - "limit": self.kube_config.worker_pods_pending_timeout_batch_size, - "field_selector": "status.phase=Pending", - "label_selector": f"airflow-worker={self.scheduler_job_id}", - **self.kube_config.kube_client_request_args, - } - pending_pods = self._list_pods(kwargs) - - cutoff = timezone.utcnow() - timedelta(seconds=timeout) - for pod in pending_pods: - self.log.debug( - 'Found a pending pod "%s", created "%s"', pod.metadata.name, pod.metadata.creation_timestamp - ) - if pod.metadata.creation_timestamp < cutoff: - self.log.error( - ( - 'Pod "%s" has been pending for longer than %d seconds.' - "It will be deleted and set to failed." - ), - pod.metadata.name, - timeout, - ) - self.kube_scheduler.delete_pod(pod.metadata.name, pod.metadata.namespace) - def _change_state(self, key: TaskInstanceKey, state: str | None, pod_id: str, namespace: str) -> None: if TYPE_CHECKING: assert self.kube_scheduler @@ -854,6 +826,44 @@ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[Task tis_to_flush.extend(pod_ids.values()) return tis_to_flush + def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: + """ + Handle remnants of tasks that were failed because they were stuck in queued. + Tasks can get stuck in queued. If such a task is detected, it will be marked + as `UP_FOR_RETRY` if the task instance has remaining retries or marked as `FAILED` + if it doesn't. + + :param tis: List of Task Instances to clean up + :return: List of readable task instances for a warning message + """ + if TYPE_CHECKING: + assert self.kube_client + assert self.kube_scheduler + readable_tis = [] + for ti in tis: + selector = PodGenerator.build_selector_for_k8s_executor_pod( + dag_id=ti.dag_id, + task_id=ti.task_id, + try_number=ti.try_number, + map_index=ti.map_index, + run_id=ti.run_id, + airflow_worker=ti.queued_by_job_id, + ) + namespace = self._get_pod_namespace(ti) + pod_list = self.kube_client.list_namespaced_pod( + namespace=namespace, + label_selector=selector, + ).items + if not pod_list: + self.log.warning("Cannot find pod for ti %s", ti) + continue + elif len(pod_list) > 1: + self.log.warning("Found multiple pods for ti %s: %s", ti, pod_list) + continue + readable_tis.append(repr(ti)) + self.kube_scheduler.delete_pod(pod_id=pod_list[0].metadata.name, namespace=namespace) + return readable_tis + def adopt_launched_task( self, kube_client: client.CoreV1Api, pod: k8s.V1Pod, pod_ids: dict[TaskInstanceKey, k8s.V1Pod] ) -> None: diff --git a/airflow/executors/local_kubernetes_executor.py b/airflow/executors/local_kubernetes_executor.py index 85c61eca84df6..39ca8608bf08b 100644 --- a/airflow/executors/local_kubernetes_executor.py +++ b/airflow/executors/local_kubernetes_executor.py @@ -198,6 +198,12 @@ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[Task *self.kubernetes_executor.try_adopt_task_instances(kubernetes_tis), ] + def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: + # LocalExecutor doesn't have a cleanup_stuck_queued_tasks method, so we + # will only run KubernetesExecutor's + kubernetes_tis = [ti for ti in tis if ti.queue == self.KUBERNETES_QUEUE] + return self.kubernetes_executor.cleanup_stuck_queued_tasks(kubernetes_tis) + def end(self) -> None: """End local and kubernetes executor.""" self.local_executor.end() diff --git a/airflow/jobs/backfill_job_runner.py b/airflow/jobs/backfill_job_runner.py index c99cae2d21a25..4a78890d3b557 100644 --- a/airflow/jobs/backfill_job_runner.py +++ b/airflow/jobs/backfill_job_runner.py @@ -618,7 +618,7 @@ def _per_task_process(key, ti: TaskInstance, session): "Not scheduling since DAG max_active_tasks limit is reached." ) - if task.max_active_tis_per_dag: + if task.max_active_tis_per_dag is not None: num_running_task_instances_in_task = DAG.get_num_task_instances( dag_id=self.dag_id, task_ids=[task.task_id], @@ -631,6 +631,20 @@ def _per_task_process(key, ti: TaskInstance, session): "Not scheduling since Task concurrency limit is reached." ) + if task.max_active_tis_per_dagrun is not None: + num_running_task_instances_in_task_dagrun = DAG.get_num_task_instances( + dag_id=self.dag_id, + run_id=ti.run_id, + task_ids=[task.task_id], + states=self.STATES_COUNT_AS_RUNNING, + session=session, + ) + + if num_running_task_instances_in_task_dagrun >= task.max_active_tis_per_dagrun: + raise TaskConcurrencyLimitReached( + "Not scheduling since Task concurrency per DAG run limit is reached." + ) + _per_task_process(key, ti, session) session.commit() except (NoAvailablePoolSlot, DagConcurrencyLimitReached, TaskConcurrencyLimitReached) as e: diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 2373c974f7271..0f750e30c1bb0 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -25,10 +25,11 @@ import sys import time import warnings -from collections import defaultdict +from collections import Counter +from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import TYPE_CHECKING, Collection, DefaultDict, Iterator +from typing import TYPE_CHECKING, Collection, Iterable, Iterator from sqlalchemy import and_, func, not_, or_, text from sqlalchemy.exc import OperationalError @@ -85,6 +86,29 @@ DM = DagModel +@dataclass +class ConcurrencyMap: + """ + Dataclass to represent concurrency maps + + It contains a map from (dag_id, task_id) to # of task instances, a map from (dag_id, task_id) + to # of task instances in the given state list and a map from (dag_id, run_id, task_id) + to # of task instances in the given state list in each DAG run. + """ + + dag_active_tasks_map: dict[str, int] + task_concurrency_map: dict[tuple[str, str], int] + task_dagrun_concurrency_map: dict[tuple[str, str, str], int] + + @classmethod + def from_concurrency_map(cls, mapping: dict[tuple[str, str, str], int]) -> ConcurrencyMap: + instance = cls(Counter(), Counter(), Counter(mapping)) + for (d, r, t), c in mapping.items(): + instance.dag_active_tasks_map[d] += c + instance.task_concurrency_map[(d, t)] += c + return instance + + def _is_parent_process() -> bool: """ Whether this is a parent process. @@ -160,6 +184,43 @@ def __init__( self._zombie_threshold_secs = conf.getint("scheduler", "scheduler_zombie_task_threshold") self._standalone_dag_processor = conf.getboolean("scheduler", "standalone_dag_processor") self._dag_stale_not_seen_duration = conf.getint("scheduler", "dag_stale_not_seen_duration") + + # Since the functionality for stalled_task_timeout, task_adoption_timeout, and + # worker_pods_pending_timeout are now handled by a single config (task_queued_timeout), + # we can't deprecate them as we normally would. So, we'll read each config and take + # the max value in order to ensure we're not undercutting a legitimate + # use of any of these configs. + stalled_task_timeout = conf.getfloat("celery", "stalled_task_timeout", fallback=0) + if stalled_task_timeout: + # TODO: Remove in Airflow 3.0 + warnings.warn( + "The '[celery] stalled_task_timeout' config option is deprecated. " + "Please update your config to use '[scheduler] task_queued_timeout' instead.", + DeprecationWarning, + ) + task_adoption_timeout = conf.getfloat("celery", "task_adoption_timeout", fallback=0) + if task_adoption_timeout: + # TODO: Remove in Airflow 3.0 + warnings.warn( + "The '[celery] task_adoption_timeout' config option is deprecated. " + "Please update your config to use '[scheduler] task_queued_timeout' instead.", + DeprecationWarning, + ) + worker_pods_pending_timeout = conf.getfloat( + "kubernetes_executor", "worker_pods_pending_timeout", fallback=0 + ) + if worker_pods_pending_timeout: + # TODO: Remove in Airflow 3.0 + warnings.warn( + "The '[kubernetes_executor] worker_pods_pending_timeout' config option is deprecated. " + "Please update your config to use '[scheduler] task_queued_timeout' instead.", + DeprecationWarning, + ) + task_queued_timeout = conf.getfloat("scheduler", "task_queued_timeout") + self._task_queued_timeout = max( + stalled_task_timeout, task_adoption_timeout, worker_pods_pending_timeout, task_queued_timeout + ) + self.do_pickle = do_pickle if log: @@ -231,28 +292,21 @@ def is_alive(self, grace_multiplier: float | None = None) -> bool: < scheduler_health_check_threshold ) - def __get_concurrency_maps( - self, states: list[TaskInstanceState], session: Session - ) -> tuple[DefaultDict[str, int], DefaultDict[tuple[str, str], int]]: + def __get_concurrency_maps(self, states: Iterable[TaskInstanceState], session: Session) -> ConcurrencyMap: """ Get the concurrency maps. :param states: List of states to query for - :return: A map from (dag_id, task_id) to # of task instances and - a map from (dag_id, task_id) to # of task instances in the given state list + :return: Concurrency map """ - ti_concurrency_query: list[tuple[str, str, int]] = ( - session.query(TI.task_id, TI.dag_id, func.count("*")) + ti_concurrency_query: list[tuple[str, str, str, int]] = ( + session.query(TI.task_id, TI.run_id, TI.dag_id, func.count("*")) .filter(TI.state.in_(states)) - .group_by(TI.task_id, TI.dag_id) - ).all() - dag_map: DefaultDict[str, int] = defaultdict(int) - task_map: DefaultDict[tuple[str, str], int] = defaultdict(int) - for result in ti_concurrency_query: - task_id, dag_id, count = result - dag_map[dag_id] += count - task_map[(dag_id, task_id)] = count - return dag_map, task_map + .group_by(TI.task_id, TI.run_id, TI.dag_id) + ) + return ConcurrencyMap.from_concurrency_map( + {(dag_id, run_id, task_id): count for task_id, run_id, dag_id, count in ti_concurrency_query} + ) def _executable_task_instances_to_queued(self, max_tis: int, session: Session) -> list[TI]: """ @@ -263,6 +317,8 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - - DAG max_active_tasks - executor state - priority + - max active tis per DAG + - max active tis per DAG run :param max_tis: Maximum number of TIs to queue in this loop. :return: list[airflow.models.TaskInstance] @@ -304,27 +360,23 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - starved_pools = {pool_name for pool_name, stats in pools.items() if stats["open"] <= 0} # dag_id to # of running tasks and (dag_id, task_id) to # of running tasks. - dag_active_tasks_map: DefaultDict[str, int] - task_concurrency_map: DefaultDict[tuple[str, str], int] - dag_active_tasks_map, task_concurrency_map = self.__get_concurrency_maps( - states=list(EXECUTION_STATES), session=session - ) + concurrency_map = self.__get_concurrency_maps(states=EXECUTION_STATES, session=session) - num_tasks_in_executor = 0 # Number of tasks that cannot be scheduled because of no open slot in pool num_starving_tasks_total = 0 # dag and task ids that can't be queued because of concurrency limits starved_dags: set[str] = set() starved_tasks: set[tuple[str, str]] = set() + starved_tasks_task_dagrun_concurrency: set[tuple[str, str, str]] = set() - pool_num_starving_tasks: DefaultDict[str, int] = defaultdict(int) + pool_num_starving_tasks: dict[str, int] = Counter() for loop_count in itertools.count(start=1): - num_starved_pools = len(starved_pools) num_starved_dags = len(starved_dags) num_starved_tasks = len(starved_tasks) + num_starved_tasks_task_dagrun_concurrency = len(starved_tasks_task_dagrun_concurrency) # Get task instances associated with scheduled # DagRuns which are not backfilled, in the given states, @@ -348,7 +400,14 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - query = query.filter(not_(TI.dag_id.in_(starved_dags))) if starved_tasks: - task_filter = tuple_in_condition((TaskInstance.dag_id, TaskInstance.task_id), starved_tasks) + task_filter = tuple_in_condition((TI.dag_id, TI.task_id), starved_tasks) + query = query.filter(not_(task_filter)) + + if starved_tasks_task_dagrun_concurrency: + task_filter = tuple_in_condition( + (TI.dag_id, TI.run_id, TI.task_id), + starved_tasks_task_dagrun_concurrency, + ) query = query.filter(not_(task_filter)) query = query.limit(max_tis) @@ -440,7 +499,7 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - # reached. dag_id = task_instance.dag_id - current_active_tasks_per_dag = dag_active_tasks_map[dag_id] + current_active_tasks_per_dag = concurrency_map.dag_active_tasks_map[dag_id] max_active_tasks_per_dag_limit = task_instance.dag_model.max_active_tasks self.log.info( "DAG %s has %s/%s running and queued tasks", @@ -482,7 +541,7 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - ).max_active_tis_per_dag if task_concurrency_limit is not None: - current_task_concurrency = task_concurrency_map[ + current_task_concurrency = concurrency_map.task_concurrency_map[ (task_instance.dag_id, task_instance.task_id) ] @@ -495,10 +554,35 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - starved_tasks.add((task_instance.dag_id, task_instance.task_id)) continue + task_dagrun_concurrency_limit: int | None = None + if serialized_dag.has_task(task_instance.task_id): + task_dagrun_concurrency_limit = serialized_dag.get_task( + task_instance.task_id + ).max_active_tis_per_dagrun + + if task_dagrun_concurrency_limit is not None: + current_task_dagrun_concurrency = concurrency_map.task_dagrun_concurrency_map[ + (task_instance.dag_id, task_instance.run_id, task_instance.task_id) + ] + + if current_task_dagrun_concurrency >= task_dagrun_concurrency_limit: + self.log.info( + "Not executing %s since the task concurrency per DAG run for" + " this task has been reached.", + task_instance, + ) + starved_tasks_task_dagrun_concurrency.add( + (task_instance.dag_id, task_instance.run_id, task_instance.task_id) + ) + continue + executable_tis.append(task_instance) open_slots -= task_instance.pool_slots - dag_active_tasks_map[dag_id] += 1 - task_concurrency_map[(task_instance.dag_id, task_instance.task_id)] += 1 + concurrency_map.dag_active_tasks_map[dag_id] += 1 + concurrency_map.task_concurrency_map[(task_instance.dag_id, task_instance.task_id)] += 1 + concurrency_map.task_dagrun_concurrency_map[ + (task_instance.dag_id, task_instance.run_id, task_instance.task_id) + ] += 1 pool_stats["open"] = open_slots @@ -508,6 +592,7 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - len(starved_pools) > num_starved_pools or len(starved_dags) > num_starved_dags or len(starved_tasks) > num_starved_tasks + or len(starved_tasks_task_dagrun_concurrency) > num_starved_tasks_task_dagrun_concurrency ) if is_done or not found_new_filters: @@ -523,7 +608,6 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session) - Stats.gauge(f"pool.starving_tasks.{pool_name}", num_starving_tasks) Stats.gauge("scheduler.tasks.starving", num_starving_tasks_total) - Stats.gauge("scheduler.tasks.running", num_tasks_in_executor) Stats.gauge("scheduler.tasks.executable", len(executable_tis)) if len(executable_tis) > 0: @@ -700,7 +784,7 @@ def _process_executor_events(self, session: Session) -> int: if ti_queued and not ti_requeued: Stats.incr( "scheduler.tasks.killed_externally", - tags={"dag_id": ti.dag_id, "run_id": ti.run_id, "task_id": ti.task_id}, + tags={"dag_id": ti.dag_id, "task_id": ti.task_id}, ) msg = ( "Executor reports task instance %s finished (%s) although the " @@ -818,13 +902,13 @@ def _update_dag_run_state_for_paused_dags(self, session: Session = NEW_SESSION) paused_runs = ( session.query(DagRun) .join(DagRun.dag_model) - .join(TaskInstance) + .join(TI) .filter( DagModel.is_paused == expression.true(), DagRun.state == DagRunState.RUNNING, DagRun.run_type != DagRunType.BACKFILL_JOB, ) - .having(DagRun.last_scheduling_decision <= func.max(TaskInstance.updated_at)) + .having(DagRun.last_scheduling_decision <= func.max(TI.updated_at)) .group_by(DagRun) ) for dag_run in paused_runs: @@ -885,8 +969,14 @@ def _run_scheduler_loop(self) -> None: conf.getfloat("scheduler", "zombie_detection_interval", fallback=10.0), self._find_zombies, ) + timers.call_regular_interval(60.0, self._update_dag_run_state_for_paused_dags) + timers.call_regular_interval( + conf.getfloat("scheduler", "task_queued_timeout_check_interval"), + self._fail_tasks_stuck_in_queued, + ) + timers.call_regular_interval( conf.getfloat("scheduler", "parsing_cleanup_interval"), self._orphan_unreferenced_datasets, @@ -900,7 +990,6 @@ def _run_scheduler_loop(self) -> None: for loop_count in itertools.count(start=1): with Stats.timer("scheduler.scheduler_loop_duration") as timer: - if self.using_sqlite and self.processor_agent: self.processor_agent.run_single_parsing_loop() # For the sqlite case w/ 1 thread, wait until the processor @@ -1081,13 +1170,11 @@ def _create_dag_runs(self, dag_models: Collection[DagModel], session: Session) - .all() ) - active_runs_of_dags = defaultdict( - int, + active_runs_of_dags = Counter( DagRun.active_runs_of_dags(dag_ids=(dm.dag_id for dm in dag_models), session=session), ) for dag_model in dag_models: - dag = self.dagbag.get_dag(dag_model.dag_id, session=session) if not dag: self.log.error("DAG '%s' not found in serialized_dag table", dag_model.dag_id) @@ -1167,7 +1254,6 @@ def _create_dag_runs_dataset_triggered( # instead of falling in a loop of Integrity Error. exec_date = exec_dates[dag.dag_id] if (dag.dag_id, exec_date) not in existing_dagruns: - previous_dag_run = ( session.query(DagRun) .filter( @@ -1239,8 +1325,7 @@ def _start_queued_dagruns(self, session: Session) -> None: """Find DagRuns in queued state and decide moving them to running state.""" dag_runs = self._get_next_dagruns_to_examine(DagRunState.QUEUED, session) - active_runs_of_dags = defaultdict( - int, + active_runs_of_dags = Counter( DagRun.active_runs_of_dags((dr.dag_id for dr in dag_runs), only_running=True, session=session), ) @@ -1427,6 +1512,42 @@ def _send_sla_callbacks_to_processor(self, dag: DAG) -> None: ) self.job.executor.send_callback(request) + @provide_session + def _fail_tasks_stuck_in_queued(self, session: Session = NEW_SESSION) -> None: + """ + Mark tasks stuck in queued for longer than `task_queued_timeout` as failed. + + Tasks can get stuck in queued for a wide variety of reasons (e.g. celery loses + track of a task, a cluster can't further scale up its workers, etc.), but tasks + should not be stuck in queued for a long time. This will mark tasks stuck in + queued for longer than `self._task_queued_timeout` as failed. If the task has + available retries, it will be retried. + """ + self.log.debug("Calling SchedulerJob._fail_tasks_stuck_in_queued method") + + tasks_stuck_in_queued = ( + session.query(TI) + .filter( + TI.state == State.QUEUED, + TI.queued_dttm < (timezone.utcnow() - timedelta(seconds=self._task_queued_timeout)), + TI.queued_by_job_id == self.job.id, + ) + .all() + ) + try: + tis_for_warning_message = self.job.executor.cleanup_stuck_queued_tasks(tis=tasks_stuck_in_queued) + if tis_for_warning_message: + task_instance_str = "\n\t".join(tis_for_warning_message) + self.log.warning( + "Marked the following %s task instances stuck in queued as failed. " + "If the task instance has available retries, it will be retried.\n\t%s", + len(tasks_stuck_in_queued), + task_instance_str, + ) + except NotImplementedError: + self.log.debug("Executor doesn't support cleanup of stuck queued tasks. Skipping.") + ... + @provide_session def _emit_pool_metrics(self, session: Session = NEW_SESSION) -> None: from airflow.models.pool import Pool @@ -1535,10 +1656,10 @@ def check_trigger_timeouts(self, session: Session = NEW_SESSION) -> None: or execution timeout has passed, so they can be marked as failed. """ num_timed_out_tasks = ( - session.query(TaskInstance) + session.query(TI) .filter( - TaskInstance.state == TaskInstanceState.DEFERRED, - TaskInstance.trigger_timeout < timezone.utcnow(), + TI.state == TaskInstanceState.DEFERRED, + TI.trigger_timeout < timezone.utcnow(), ) .update( # We have to schedule these to fail themselves so it doesn't @@ -1596,12 +1717,10 @@ def _find_zombies(self) -> None: ) self.log.error("Detected zombie job: %s", request) self.job.executor.send_callback(request) - Stats.incr( - "zombies_killed", tags={"dag_id": ti.dag_id, "run_id": ti.run_id, "task_id": ti.task_id} - ) + Stats.incr("zombies_killed", tags={"dag_id": ti.dag_id, "task_id": ti.task_id}) @staticmethod - def _generate_zombie_message_details(ti: TaskInstance): + def _generate_zombie_message_details(ti: TI): zombie_message_details = { "DAG Id": ti.dag_id, "Task Id": ti.task_id, diff --git a/airflow/kubernetes/kube_client.py b/airflow/kubernetes/kube_client.py index 7e887ae1ac4bb..eb3912db3c7fa 100644 --- a/airflow/kubernetes/kube_client.py +++ b/airflow/kubernetes/kube_client.py @@ -19,6 +19,8 @@ import logging +import urllib3.util + from airflow.configuration import conf log = logging.getLogger(__name__) @@ -107,16 +109,27 @@ def get_kube_client( if conf.getboolean("kubernetes_executor", "enable_tcp_keepalive"): _enable_tcp_keepalive() + configuration = _get_default_configuration() + api_client_retry_configuration = conf.getjson("kubernetes", "api_client_retry_configuration", fallback={}) + + if not conf.getboolean("kubernetes_executor", "verify_ssl"): + _disable_verify_ssl() + + if isinstance(api_client_retry_configuration, dict): + configuration.retries = urllib3.util.Retry(**api_client_retry_configuration) + else: + raise ValueError("api_client_retry_configuration should be a dictionary") + if in_cluster: - config.load_incluster_config() + config.load_incluster_config(client_configuration=configuration) else: if cluster_context is None: cluster_context = conf.get("kubernetes_executor", "cluster_context", fallback=None) if config_file is None: config_file = conf.get("kubernetes_executor", "config_file", fallback=None) - config.load_kube_config(config_file=config_file, context=cluster_context) - - configuration = _get_default_configuration() + config.load_kube_config( + config_file=config_file, context=cluster_context, client_configuration=configuration + ) if not conf.getboolean("kubernetes_executor", "verify_ssl"): configuration.verify_ssl = False diff --git a/airflow/kubernetes/kube_config.py b/airflow/kubernetes/kube_config.py index da2994e230298..20bddf82f38a5 100644 --- a/airflow/kubernetes/kube_config.py +++ b/airflow/kubernetes/kube_config.py @@ -71,13 +71,6 @@ def __init__(self): # interact with cluster components. self.executor_namespace = conf.get(self.kubernetes_section, "namespace") - self.worker_pods_pending_timeout = conf.getint(self.kubernetes_section, "worker_pods_pending_timeout") - self.worker_pods_pending_timeout_check_interval = conf.getint( - self.kubernetes_section, "worker_pods_pending_timeout_check_interval" - ) - self.worker_pods_pending_timeout_batch_size = conf.getint( - self.kubernetes_section, "worker_pods_pending_timeout_batch_size" - ) self.worker_pods_queued_check_interval = conf.getint( self.kubernetes_section, "worker_pods_queued_check_interval" ) diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 1760bd1f420ab..37106c580f16b 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -90,6 +90,7 @@ from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.setup_teardown import SetupTeardownContext from airflow.utils.trigger_rule import TriggerRule +from airflow.utils.types import NOTSET, ArgNotSet from airflow.utils.weight_rule import WeightRule from airflow.utils.xcom import XCOM_RETURN_KEY @@ -184,6 +185,26 @@ def partial(**kwargs): return self.class_method.__get__(cls, cls) +_PARTIAL_DEFAULTS = { + "owner": DEFAULT_OWNER, + "trigger_rule": DEFAULT_TRIGGER_RULE, + "depends_on_past": False, + "ignore_first_depends_on_past": DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST, + "wait_for_past_depends_before_skipping": DEFAULT_WAIT_FOR_PAST_DEPENDS_BEFORE_SKIPPING, + "wait_for_downstream": False, + "retries": DEFAULT_RETRIES, + "queue": DEFAULT_QUEUE, + "pool_slots": DEFAULT_POOL_SLOTS, + "execution_timeout": DEFAULT_TASK_EXECUTION_TIMEOUT, + "retry_delay": DEFAULT_RETRY_DELAY, + "retry_exponential_backoff": False, + "priority_weight": DEFAULT_PRIORITY_WEIGHT, + "weight_rule": DEFAULT_WEIGHT_RULE, + "inlets": [], + "outlets": [], +} + + # This is what handles the actual mapping. def partial( operator_class: type[BaseOperator], @@ -191,42 +212,43 @@ def partial( task_id: str, dag: DAG | None = None, task_group: TaskGroup | None = None, - start_date: datetime | None = None, - end_date: datetime | None = None, - owner: str = DEFAULT_OWNER, - email: None | str | Iterable[str] = None, + start_date: datetime | ArgNotSet = NOTSET, + end_date: datetime | ArgNotSet = NOTSET, + owner: str | ArgNotSet = NOTSET, + email: None | str | Iterable[str] | ArgNotSet = NOTSET, params: collections.abc.MutableMapping | None = None, - resources: dict[str, Any] | None = None, - trigger_rule: str = DEFAULT_TRIGGER_RULE, - depends_on_past: bool = False, - ignore_first_depends_on_past: bool = DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST, - wait_for_past_depends_before_skipping: bool = DEFAULT_WAIT_FOR_PAST_DEPENDS_BEFORE_SKIPPING, - wait_for_downstream: bool = False, - retries: int | None = DEFAULT_RETRIES, - queue: str = DEFAULT_QUEUE, - pool: str | None = None, - pool_slots: int = DEFAULT_POOL_SLOTS, - execution_timeout: timedelta | None = DEFAULT_TASK_EXECUTION_TIMEOUT, - max_retry_delay: None | timedelta | float = None, - retry_delay: timedelta | float = DEFAULT_RETRY_DELAY, - retry_exponential_backoff: bool = False, - priority_weight: int = DEFAULT_PRIORITY_WEIGHT, - weight_rule: str = DEFAULT_WEIGHT_RULE, - sla: timedelta | None = None, - max_active_tis_per_dag: int | None = None, - on_execute_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] = None, - on_failure_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] = None, - on_success_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] = None, - on_retry_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] = None, - run_as_user: str | None = None, - executor_config: dict | None = None, - inlets: Any | None = None, - outlets: Any | None = None, - doc: str | None = None, - doc_md: str | None = None, - doc_json: str | None = None, - doc_yaml: str | None = None, - doc_rst: str | None = None, + resources: dict[str, Any] | None | ArgNotSet = NOTSET, + trigger_rule: str | ArgNotSet = NOTSET, + depends_on_past: bool | ArgNotSet = NOTSET, + ignore_first_depends_on_past: bool | ArgNotSet = NOTSET, + wait_for_past_depends_before_skipping: bool | ArgNotSet = NOTSET, + wait_for_downstream: bool | ArgNotSet = NOTSET, + retries: int | None | ArgNotSet = NOTSET, + queue: str | ArgNotSet = NOTSET, + pool: str | ArgNotSet = NOTSET, + pool_slots: int | ArgNotSet = NOTSET, + execution_timeout: timedelta | None | ArgNotSet = NOTSET, + max_retry_delay: None | timedelta | float | ArgNotSet = NOTSET, + retry_delay: timedelta | float | ArgNotSet = NOTSET, + retry_exponential_backoff: bool | ArgNotSet = NOTSET, + priority_weight: int | ArgNotSet = NOTSET, + weight_rule: str | ArgNotSet = NOTSET, + sla: timedelta | None | ArgNotSet = NOTSET, + max_active_tis_per_dag: int | None | ArgNotSet = NOTSET, + max_active_tis_per_dagrun: int | None | ArgNotSet = NOTSET, + on_execute_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] | ArgNotSet = NOTSET, + on_failure_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] | ArgNotSet = NOTSET, + on_success_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] | ArgNotSet = NOTSET, + on_retry_callback: None | TaskStateChangeCallback | list[TaskStateChangeCallback] | ArgNotSet = NOTSET, + run_as_user: str | None | ArgNotSet = NOTSET, + executor_config: dict | None | ArgNotSet = NOTSET, + inlets: Any | None | ArgNotSet = NOTSET, + outlets: Any | None | ArgNotSet = NOTSET, + doc: str | None | ArgNotSet = NOTSET, + doc_md: str | None | ArgNotSet = NOTSET, + doc_json: str | None | ArgNotSet = NOTSET, + doc_yaml: str | None | ArgNotSet = NOTSET, + doc_rst: str | None | ArgNotSet = NOTSET, **kwargs, ) -> OperatorPartial: from airflow.models.dag import DagContext @@ -241,53 +263,62 @@ def partial( task_id = task_group.child_id(task_id) # Merge DAG and task group level defaults into user-supplied values. - partial_kwargs, partial_params = get_merged_defaults( + dag_default_args, partial_params = get_merged_defaults( dag=dag, task_group=task_group, task_params=params, task_default_args=kwargs.pop("default_args", None), ) - partial_kwargs.update(kwargs) - - # Always fully populate partial kwargs to exclude them from map(). - partial_kwargs.setdefault("dag", dag) - partial_kwargs.setdefault("task_group", task_group) - partial_kwargs.setdefault("task_id", task_id) - partial_kwargs.setdefault("start_date", start_date) - partial_kwargs.setdefault("end_date", end_date) - partial_kwargs.setdefault("owner", owner) - partial_kwargs.setdefault("email", email) - partial_kwargs.setdefault("trigger_rule", trigger_rule) - partial_kwargs.setdefault("depends_on_past", depends_on_past) - partial_kwargs.setdefault("ignore_first_depends_on_past", ignore_first_depends_on_past) - partial_kwargs.setdefault("wait_for_past_depends_before_skipping", wait_for_past_depends_before_skipping) - partial_kwargs.setdefault("wait_for_downstream", wait_for_downstream) - partial_kwargs.setdefault("retries", retries) - partial_kwargs.setdefault("queue", queue) - partial_kwargs.setdefault("pool", pool) - partial_kwargs.setdefault("pool_slots", pool_slots) - partial_kwargs.setdefault("execution_timeout", execution_timeout) - partial_kwargs.setdefault("max_retry_delay", max_retry_delay) - partial_kwargs.setdefault("retry_delay", retry_delay) - partial_kwargs.setdefault("retry_exponential_backoff", retry_exponential_backoff) - partial_kwargs.setdefault("priority_weight", priority_weight) - partial_kwargs.setdefault("weight_rule", weight_rule) - partial_kwargs.setdefault("sla", sla) - partial_kwargs.setdefault("max_active_tis_per_dag", max_active_tis_per_dag) - partial_kwargs.setdefault("on_execute_callback", on_execute_callback) - partial_kwargs.setdefault("on_failure_callback", on_failure_callback) - partial_kwargs.setdefault("on_retry_callback", on_retry_callback) - partial_kwargs.setdefault("on_success_callback", on_success_callback) - partial_kwargs.setdefault("run_as_user", run_as_user) - partial_kwargs.setdefault("executor_config", executor_config) - partial_kwargs.setdefault("inlets", inlets or []) - partial_kwargs.setdefault("outlets", outlets or []) - partial_kwargs.setdefault("resources", resources) - partial_kwargs.setdefault("doc", doc) - partial_kwargs.setdefault("doc_json", doc_json) - partial_kwargs.setdefault("doc_md", doc_md) - partial_kwargs.setdefault("doc_rst", doc_rst) - partial_kwargs.setdefault("doc_yaml", doc_yaml) + + # Create partial_kwargs from args and kwargs + partial_kwargs: dict[str, Any] = { + **kwargs, + "dag": dag, + "task_group": task_group, + "task_id": task_id, + "start_date": start_date, + "end_date": end_date, + "owner": owner, + "email": email, + "trigger_rule": trigger_rule, + "depends_on_past": depends_on_past, + "ignore_first_depends_on_past": ignore_first_depends_on_past, + "wait_for_past_depends_before_skipping": wait_for_past_depends_before_skipping, + "wait_for_downstream": wait_for_downstream, + "retries": retries, + "queue": queue, + "pool": pool, + "pool_slots": pool_slots, + "execution_timeout": execution_timeout, + "max_retry_delay": max_retry_delay, + "retry_delay": retry_delay, + "retry_exponential_backoff": retry_exponential_backoff, + "priority_weight": priority_weight, + "weight_rule": weight_rule, + "sla": sla, + "max_active_tis_per_dag": max_active_tis_per_dag, + "max_active_tis_per_dagrun": max_active_tis_per_dagrun, + "on_execute_callback": on_execute_callback, + "on_failure_callback": on_failure_callback, + "on_retry_callback": on_retry_callback, + "on_success_callback": on_success_callback, + "run_as_user": run_as_user, + "executor_config": executor_config, + "inlets": inlets, + "outlets": outlets, + "resources": resources, + "doc": doc, + "doc_json": doc_json, + "doc_md": doc_md, + "doc_rst": doc_rst, + "doc_yaml": doc_yaml, + } + + # Inject DAG-level default args into args provided to this function. + partial_kwargs.update((k, v) for k, v in dag_default_args.items() if partial_kwargs.get(k) is NOTSET) + + # Fill fields not provided by the user with default values. + partial_kwargs = {k: _PARTIAL_DEFAULTS.get(k) if v is NOTSET else v for k, v in partial_kwargs.items()} # Post-process arguments. Should be kept in sync with _TaskDecorator.expand(). if "task_concurrency" in kwargs: # Reject deprecated option. @@ -578,6 +609,8 @@ class derived from this one results in the creation of a task object, :param run_as_user: unix username to impersonate while running the task :param max_active_tis_per_dag: When set, a task will be able to limit the concurrent runs across execution_dates. + :param max_active_tis_per_dagrun: When set, a task will be able to limit the concurrent + task instances per DAG run. :param executor_config: Additional task-level configuration parameters that are interpreted by a specific executor. Parameters are namespaced by the name of executor. @@ -729,6 +762,7 @@ def __init__( run_as_user: str | None = None, task_concurrency: int | None = None, max_active_tis_per_dag: int | None = None, + max_active_tis_per_dagrun: int | None = None, executor_config: dict | None = None, do_xcom_push: bool = True, inlets: Any | None = None, @@ -872,6 +906,7 @@ def __init__( ) max_active_tis_per_dag = task_concurrency self.max_active_tis_per_dag: int | None = max_active_tis_per_dag + self.max_active_tis_per_dagrun: int | None = max_active_tis_per_dagrun self.do_xcom_push = do_xcom_push self.doc_md = doc_md diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 8b259cd3e825a..89becc04c7b10 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -1341,9 +1341,7 @@ def handle_callback(self, dagrun, success=True, reason=None, session=NEW_SESSION callback(context) except Exception: self.log.exception("failed to invoke dag state update callback") - Stats.incr( - "dag.callback_exceptions", tags={"dag_id": dagrun.dag_id, "run_id": dagrun.run_id} - ) + Stats.incr("dag.callback_exceptions", tags={"dag_id": dagrun.dag_id}) def get_active_runs(self): """ @@ -2789,7 +2787,10 @@ def bulk_write_to_db( orm_dag.description = dag.description orm_dag.max_active_tasks = dag.max_active_tasks orm_dag.max_active_runs = dag.max_active_runs - orm_dag.has_task_concurrency_limits = any(t.max_active_tis_per_dag is not None for t in dag.tasks) + orm_dag.has_task_concurrency_limits = any( + t.max_active_tis_per_dag is not None or t.max_active_tis_per_dagrun is not None + for t in dag.tasks + ) orm_dag.schedule_interval = dag.schedule_interval orm_dag.timetable_description = dag.timetable.description orm_dag.processor_subdir = processor_subdir @@ -2990,12 +2991,13 @@ def deactivate_stale_dags(expiration_date, session=NEW_SESSION): @staticmethod @provide_session - def get_num_task_instances(dag_id, task_ids=None, states=None, session=NEW_SESSION) -> int: + def get_num_task_instances(dag_id, run_id=None, task_ids=None, states=None, session=NEW_SESSION) -> int: """ Returns the number of task instances in the given DAG. :param session: ORM session :param dag_id: ID of the DAG to get the task concurrency of + :param run_id: ID of the DAG run to get the task concurrency of :param task_ids: A list of valid task IDs for the given DAG :param states: A list of states to filter by if supplied :return: The number of running tasks @@ -3003,6 +3005,10 @@ def get_num_task_instances(dag_id, task_ids=None, states=None, session=NEW_SESSI qry = session.query(func.count(TaskInstance.task_id)).filter( TaskInstance.dag_id == dag_id, ) + if run_id: + qry = qry.filter( + TaskInstance.run_id == run_id, + ) if task_ids: qry = qry.filter( TaskInstance.task_id.in_(task_ids), diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index f9d54cfb847f6..3d00ac973fd64 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -192,7 +192,8 @@ def get_dag(self, dag_id, session: Session = None): # If DAG is in the DagBag, check the following # 1. if time has come to check if DAG is updated (controlled by min_serialized_dag_fetch_secs) - # 2. check the last_updated column in SerializedDag table to see if Serialized DAG is updated + # 2. check the last_updated and hash columns in SerializedDag table to see if + # Serialized DAG is updated # 3. if (2) is yes, fetch the Serialized DAG. # 4. if (2) returns None (i.e. Serialized DAG is deleted), remove dag from dagbag # if it exists and return None. @@ -201,18 +202,24 @@ def get_dag(self, dag_id, session: Session = None): dag_id in self.dags_last_fetched and timezone.utcnow() > self.dags_last_fetched[dag_id] + min_serialized_dag_fetch_secs ): - sd_last_updated_datetime = SerializedDagModel.get_last_updated_datetime( - dag_id=dag_id, - session=session, + sd_latest_version_and_updated_datetime = ( + SerializedDagModel.get_latest_version_hash_and_updated_datetime( + dag_id=dag_id, session=session + ) ) - if not sd_last_updated_datetime: + if not sd_latest_version_and_updated_datetime: self.log.warning("Serialized DAG %s no longer exists", dag_id) del self.dags[dag_id] del self.dags_last_fetched[dag_id] del self.dags_hash[dag_id] return None - if sd_last_updated_datetime > self.dags_last_fetched[dag_id]: + sd_latest_version, sd_last_updated_datetime = sd_latest_version_and_updated_datetime + + if ( + sd_last_updated_datetime > self.dags_last_fetched[dag_id] + or sd_latest_version != self.dags_hash[dag_id] + ): self._add_dag_from_db(dag_id=dag_id, session=session) return self.dags.get(dag_id) diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 101bda0a471a6..427cf41c88c0e 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -61,7 +61,7 @@ from airflow.ti_deps.dependencies_states import SCHEDULEABLE_STATES from airflow.typing_compat import Literal from airflow.utils import timezone -from airflow.utils.helpers import is_container +from airflow.utils.helpers import is_container, prune_dict from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.sqlalchemy import UtcDateTime, nulls_first, skip_locked, tuple_in_condition, with_row_locks @@ -233,6 +233,10 @@ def __repr__(self): external_trigger=self.external_trigger, ) + @property + def stats_tags(self) -> dict[str, str]: + return prune_dict({"dag_id": self.dag_id, "run_type": self.run_type}) + @property def logical_date(self) -> datetime: return self.execution_date @@ -553,6 +557,7 @@ def should_schedule(self) -> bool: bool(self.tis) and all(not t.task.depends_on_past for t in self.tis) and all(t.task.max_active_tis_per_dag is None for t in self.tis) + and all(t.task.max_active_tis_per_dagrun is None for t in self.tis) and all(t.state != TaskInstanceState.DEFERRED for t in self.tis) ) @@ -561,7 +566,10 @@ def recalculate(self) -> _UnfinishedStates: start_dttm = timezone.utcnow() self.last_scheduling_decision = start_dttm - with Stats.timer(f"dagrun.dependency-check.{self.dag_id}"): + with Stats.timer( + f"dagrun.dependency-check.{self.dag_id}", + tags=self.stats_tags, + ): dag = self.get_dag() info = self.task_instance_scheduling_decisions(session) @@ -893,12 +901,10 @@ def _emit_true_scheduling_delay_stats_for_finished_state(self, finished_tis: lis data_interval_end = dag.get_run_data_interval(self).end true_delay = first_start_date - data_interval_end if true_delay.total_seconds() > 0: - Stats.timing(f"dagrun.{dag.dag_id}.first_task_scheduling_delay", true_delay) Stats.timing( - "dagrun.first_task_scheduling_delay", - true_delay, - tags={"dag_id": dag.dag_id}, + f"dagrun.{dag.dag_id}.first_task_scheduling_delay", true_delay, tags=self.stats_tags ) + Stats.timing("dagrun.first_task_scheduling_delay", true_delay, tags=self.stats_tags) except Exception: self.log.warning("Failed to record first_task_scheduling_delay metric:", exc_info=True) @@ -913,12 +919,9 @@ def _emit_duration_stats_for_finished_state(self): return duration = self.end_date - self.start_date - if self.state == State.SUCCESS: - Stats.timing(f"dagrun.duration.success.{self.dag_id}", duration) - Stats.timing("dagrun.duration.success", duration, tags={"dag_id": self.dag_id}) - elif self.state == State.FAILED: - Stats.timing(f"dagrun.duration.failed.{self.dag_id}", duration) - Stats.timing("dagrun.duration.failed", duration, tags={"dag_id": self.dag_id}) + timer_params = {"dt": duration, "tags": self.stats_tags} + Stats.timing(f"dagrun.duration.{self.state.value}.{self.dag_id}", **timer_params) + Stats.timing(f"dagrun.duration.{self.state.value}", **timer_params) @provide_session def verify_integrity(self, *, session: Session = NEW_SESSION) -> None: @@ -981,14 +984,14 @@ def _check_for_removed_or_restored_tasks( should_restore_task = (task is not None) and ti.state == State.REMOVED if should_restore_task: self.log.info("Restoring task '%s' which was previously removed from DAG '%s'", ti, dag) - Stats.incr(f"task_restored_to_dag.{dag.dag_id}", 1, 1) + Stats.incr(f"task_restored_to_dag.{dag.dag_id}", tags=self.stats_tags) ti.state = State.NONE except AirflowException: if ti.state == State.REMOVED: pass # ti has already been removed, just ignore it elif self.state != State.RUNNING and not dag.partial: self.log.warning("Failed to get task '%s' for dag '%s'. Marking it as removed.", ti, dag) - Stats.incr(f"task_removed_from_dag.{dag.dag_id}", 1, 1) + Stats.incr(f"task_removed_from_dag.{dag.dag_id}", tags=self.stats_tags) ti.state = State.REMOVED continue @@ -1144,7 +1147,7 @@ def _create_task_instances( session.bulk_save_objects(tasks) for task_type, count in created_counts.items(): - Stats.incr(f"task_instance_created-{task_type}", count) + Stats.incr(f"task_instance_created-{task_type}", count, tags=self.stats_tags) session.flush() except IntegrityError: self.log.info( diff --git a/airflow/models/mappedoperator.py b/airflow/models/mappedoperator.py index a10fd10cdc04b..345329ef6d4a1 100644 --- a/airflow/models/mappedoperator.py +++ b/airflow/models/mappedoperator.py @@ -451,6 +451,10 @@ def sla(self) -> datetime.timedelta | None: def max_active_tis_per_dag(self) -> int | None: return self.partial_kwargs.get("max_active_tis_per_dag") + @property + def max_active_tis_per_dagrun(self) -> int | None: + return self.partial_kwargs.get("max_active_tis_per_dagrun") + @property def resources(self) -> Resources | None: return self.partial_kwargs.get("resources") diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 6535bd36d4921..d3f49c64d3950 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -18,7 +18,6 @@ """Serialized DAG table in database.""" from __future__ import annotations -import hashlib import logging import zlib from datetime import datetime, timedelta @@ -35,6 +34,7 @@ from airflow.serialization.serialized_objects import DagDependency, SerializedDAG from airflow.settings import COMPRESS_SERIALIZED_DAGS, MIN_SERIALIZED_DAG_UPDATE_INTERVAL, json from airflow.utils import timezone +from airflow.utils.hashlib_wrapper import md5 from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.sqlalchemy import UtcDateTime @@ -102,7 +102,7 @@ def __init__(self, dag: DAG, processor_subdir: str | None = None) -> None: dag_data = SerializedDAG.to_dict(dag) dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8") - self.dag_hash = hashlib.md5(dag_data_json).hexdigest() + self.dag_hash = md5(dag_data_json, usedforsecurity=False).hexdigest() if COMPRESS_SERIALIZED_DAGS: self._data = None @@ -359,6 +359,24 @@ def get_latest_version_hash(cls, dag_id: str, session: Session = NEW_SESSION) -> """ return session.query(cls.dag_hash).filter(cls.dag_id == dag_id).scalar() + @classmethod + def get_latest_version_hash_and_updated_datetime( + cls, + dag_id: str, + *, + session: Session, + ) -> tuple[str, datetime] | None: + """ + Get the latest DAG version for a given DAG ID, as well as the date when the Serialized DAG associated + to DAG was last updated in serialized_dag table. + + :meta private: + :param dag_id: DAG ID + :param session: ORM Session + :return: A tuple of DAG Hash and last updated datetime, or None if the DAG is not found + """ + return session.query(cls.dag_hash, cls.last_updated).filter(cls.dag_id == dag_id).one_or_none() + @classmethod @provide_session def get_dag_dependencies(cls, session: Session = NEW_SESSION) -> dict[str, list[DagDependency]]: diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index c2ed7b786864c..4039f72bb7d68 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -87,6 +87,7 @@ ) from airflow.listeners.listener import get_listener_manager from airflow.models.base import Base, StringID +from airflow.models.dagbag import DagBag from airflow.models.log import Log from airflow.models.mappedoperator import MappedOperator from airflow.models.param import process_params @@ -105,7 +106,7 @@ from airflow.utils import timezone from airflow.utils.context import ConnectionAccessor, Context, VariableAccessor, context_merge from airflow.utils.email import send_email -from airflow.utils.helpers import render_template_to_string +from airflow.utils.helpers import prune_dict, render_template_to_string from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.module_loading import qualname from airflow.utils.net import get_hostname @@ -203,6 +204,7 @@ def clear_task_instances( task_id_by_key: dict[str, dict[str, dict[int, dict[int, set[str]]]]] = defaultdict( lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(set))) ) + dag_bag = DagBag(read_dags_from_db=True) for ti in tis: if ti.state == TaskInstanceState.RUNNING: if ti.job_id: @@ -211,15 +213,16 @@ def clear_task_instances( ti.state = TaskInstanceState.RESTARTING job_ids.append(ti.job_id) else: + ti_dag = dag if dag and dag.dag_id == ti.dag_id else dag_bag.get_dag(ti.dag_id, session=session) task_id = ti.task_id - if dag and dag.has_task(task_id): - task = dag.get_task(task_id) + if ti_dag and ti_dag.has_task(task_id): + task = ti_dag.get_task(task_id) ti.refresh_from_task(task) task_retries = task.retries ti.max_tries = ti.try_number + task_retries - 1 else: - # Ignore errors when updating max_tries if dag is None or - # task not found in dag since database records could be + # Ignore errors when updating max_tries if the DAG or + # task are not found since database records could be # outdated. We make max_tries the maximum value of its # original max_tries or the last attempted try number. ti.max_tries = max(ti.max_tries, ti.prev_attempted_tries) @@ -540,6 +543,10 @@ def __init__( # can be changed when calling 'run' self.test_mode = False + @property + def stats_tags(self) -> dict[str, str]: + return prune_dict({"dag_id": self.dag_id, "task_id": self.task_id}) + @staticmethod def insert_mapping(run_id: str, task: Operator, map_index: int) -> dict[str, Any]: """:meta private:""" @@ -1259,12 +1266,7 @@ def check_and_change_state_before_execution( self.pid = None if not ignore_all_deps and not ignore_ti_state and self.state == State.SUCCESS: - Stats.incr( - "previously_succeeded", - 1, - 1, - tags={"dag_id": self.dag_id, "run_id": self.run_id, "task_id": self.task_id}, - ) + Stats.incr("previously_succeeded", tags=self.stats_tags) if not mark_success: # Firstly find non-runnable and non-requeueable tis. @@ -1409,10 +1411,14 @@ def _run_raw_task( session.merge(self) session.commit() actual_start_date = timezone.utcnow() - Stats.incr(f"ti.start.{self.task.dag_id}.{self.task.task_id}") + Stats.incr(f"ti.start.{self.task.dag_id}.{self.task.task_id}", tags=self.stats_tags) # Initialize final state counters at zero for state in State.task_states: - Stats.incr(f"ti.finish.{self.task.dag_id}.{self.task.task_id}.{state}", count=0) + Stats.incr( + f"ti.finish.{self.task.dag_id}.{self.task.task_id}.{state}", + count=0, + tags=self.stats_tags, + ) self.task = self.task.prepare_for_execution() context = self.get_template_context(ignore_param_exceptions=False) @@ -1482,7 +1488,7 @@ def _run_raw_task( session.commit() raise finally: - Stats.incr(f"ti.finish.{self.dag_id}.{self.task_id}.{self.state}") + Stats.incr(f"ti.finish.{self.dag_id}.{self.task_id}.{self.state}", tags=self.stats_tags) # Recording SKIPPED or SUCCESS self.clear_next_method_args() @@ -1544,7 +1550,7 @@ def signal_handler(signum, frame): if not self.next_method: self.clear_xcom_data() - with Stats.timer(f"dag.{self.task.dag_id}.{self.task.task_id}.duration"): + with Stats.timer(f"dag.{self.task.dag_id}.{self.task.task_id}.duration", tags=self.stats_tags): # Set the validated/merged params on the task object. self.task.params = context["params"] @@ -1579,10 +1585,8 @@ def signal_handler(signum, frame): # Run post_execute callback self.task.post_execute(context=context, result=result) - Stats.incr(f"operator_successes_{self.task.task_type}", 1, 1) - Stats.incr( - "ti_successes", tags={"dag_id": self.dag_id, "run_id": self.run_id, "task_id": self.task_id} - ) + Stats.incr(f"operator_successes_{self.task.task_type}", tags=self.stats_tags) + Stats.incr("ti_successes", tags=self.stats_tags) def _run_finished_callback( self, @@ -1849,10 +1853,10 @@ def handle_failure( self.end_date = timezone.utcnow() self.set_duration() - Stats.incr(f"operator_failures_{self.operator}") - Stats.incr( - "ti_failures", tags={"dag_id": self.dag_id, "run_id": self.run_id, "task_id": self.task_id} - ) + + Stats.incr(f"operator_failures_{self.operator}", tags=self.stats_tags) + Stats.incr("ti_failures", tags=self.stats_tags) + if not test_mode: session.add(Log(State.FAILED, self)) @@ -2479,18 +2483,17 @@ def xcom_pull( return LazyXComAccess.build_from_xcom_query(query) @provide_session - def get_num_running_task_instances(self, session: Session) -> int: + def get_num_running_task_instances(self, session: Session, same_dagrun=False) -> int: """Return Number of running TIs from the DB""" # .count() is inefficient - return ( - session.query(func.count()) - .filter( - TaskInstance.dag_id == self.dag_id, - TaskInstance.task_id == self.task_id, - TaskInstance.state == State.RUNNING, - ) - .scalar() + num_running_task_instances_query = session.query(func.count()).filter( + TaskInstance.dag_id == self.dag_id, + TaskInstance.task_id == self.task_id, + TaskInstance.state == State.RUNNING, ) + if same_dagrun: + num_running_task_instances_query.filter(TaskInstance.run_id == self.run_id) + return num_running_task_instances_query.scalar() def init_run_context(self, raw: bool = False) -> None: """Sets the log context.""" diff --git a/airflow/operators/python.py b/airflow/operators/python.py index 8a3fa58123451..b3744c7c85abd 100644 --- a/airflow/operators/python.py +++ b/airflow/operators/python.py @@ -26,6 +26,7 @@ import types import warnings from abc import ABCMeta, abstractmethod +from collections.abc import Container from pathlib import Path from tempfile import TemporaryDirectory from textwrap import dedent @@ -33,7 +34,12 @@ import dill -from airflow.exceptions import AirflowConfigException, AirflowException, RemovedInAirflow3Warning +from airflow.exceptions import ( + AirflowConfigException, + AirflowException, + AirflowSkipException, + RemovedInAirflow3Warning, +) from airflow.models.baseoperator import BaseOperator from airflow.models.skipmixin import SkipMixin from airflow.models.taskinstance import _CURRENT_CONTEXT @@ -466,6 +472,9 @@ class PythonVirtualenvOperator(_BasePythonVirtualenvOperator): :param expect_airflow: expect Airflow to be installed in the target environment. If true, the operator will raise warning if Airflow is not installed, and it will attempt to load Airflow macros when starting. + :param skip_on_exit_code: If python_callable exits with this exit code, leave the task + in ``skipped`` state (default: None). If set to ``None``, any non-zero + exit code will be treated as a failure. """ template_fields: Sequence[str] = tuple({"requirements"} | set(PythonOperator.template_fields)) @@ -486,6 +495,7 @@ def __init__( templates_dict: dict | None = None, templates_exts: list[str] | None = None, expect_airflow: bool = True, + skip_on_exit_code: int | Container[int] | None = None, **kwargs, ): if ( @@ -509,6 +519,13 @@ def __init__( self.python_version = python_version self.system_site_packages = system_site_packages self.pip_install_options = pip_install_options + self.skip_on_exit_code = ( + skip_on_exit_code + if isinstance(skip_on_exit_code, Container) + else [skip_on_exit_code] + if skip_on_exit_code + else [] + ) super().__init__( python_callable=python_callable, use_dill=use_dill, @@ -544,8 +561,14 @@ def execute_callable(self): pip_install_options=self.pip_install_options, ) python_path = tmp_path / "bin" / "python" - - return self._execute_python_callable_in_subprocess(python_path, tmp_path) + try: + result = self._execute_python_callable_in_subprocess(python_path, tmp_path) + except subprocess.CalledProcessError as e: + if e.returncode in self.skip_on_exit_code: + raise AirflowSkipException(f"Process exited with code {e.returncode}. Skipping.") + else: + raise + return result def _iter_serializable_context_keys(self): yield from self.BASE_SERIALIZABLE_CONTEXT_KEYS diff --git a/airflow/operators/trigger_dagrun.py b/airflow/operators/trigger_dagrun.py index 9a84bfac97dd1..b0115636c3d2c 100644 --- a/airflow/operators/trigger_dagrun.py +++ b/airflow/operators/trigger_dagrun.py @@ -20,7 +20,7 @@ import datetime import json import time -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, Any, Sequence, cast from sqlalchemy.orm.exc import NoResultFound @@ -211,13 +211,16 @@ def execute(self, context: Context): return @provide_session - def execute_complete(self, context: Context, session: Session, **kwargs): - parsed_execution_date = context["execution_date"] + def execute_complete(self, context: Context, session: Session, event: tuple[str, dict[str, Any]]): + # This execution date is parsed from the return trigger event + provided_execution_date = event[1]["execution_dates"][0] try: dag_run = ( session.query(DagRun) - .filter(DagRun.dag_id == self.trigger_dag_id, DagRun.execution_date == parsed_execution_date) + .filter( + DagRun.dag_id == self.trigger_dag_id, DagRun.execution_date == provided_execution_date + ) .one() ) diff --git a/airflow/plugins_manager.py b/airflow/plugins_manager.py index d5fed0d3a4297..7b34ceebac722 100644 --- a/airflow/plugins_manager.py +++ b/airflow/plugins_manager.py @@ -164,7 +164,7 @@ class AirflowPlugin: # A list of timetable classes that can be used for DAG scheduling. timetables: list[type[Timetable]] = [] - listeners: list[ModuleType] = [] + listeners: list[ModuleType | object] = [] @classmethod def validate(cls): diff --git a/airflow/providers/amazon/aws/hooks/base_aws.py b/airflow/providers/amazon/aws/hooks/base_aws.py index 3395990fc343e..77e372e88bbc0 100644 --- a/airflow/providers/amazon/aws/hooks/base_aws.py +++ b/airflow/providers/amazon/aws/hooks/base_aws.py @@ -794,7 +794,8 @@ def test_connection(self): @cached_property def waiter_path(self) -> PathLike[str] | None: - path = Path(__file__).parents[1].joinpath(f"waiters/{self.client_type}.json").resolve() + filename = self.client_type if self.client_type else self.resource_type + path = Path(__file__).parents[1].joinpath(f"waiters/{filename}.json").resolve() return path if path.exists() else None def get_waiter(self, waiter_name: str, parameters: dict[str, str] | None = None) -> Waiter: diff --git a/airflow/providers/amazon/aws/operators/eks.py b/airflow/providers/amazon/aws/operators/eks.py index c74dd69758b21..59c5d104d082c 100644 --- a/airflow/providers/amazon/aws/operators/eks.py +++ b/airflow/providers/amazon/aws/operators/eks.py @@ -644,7 +644,6 @@ class EksPodOperator(KubernetesPodOperator): :param namespace: The namespace in which to execute the pod. (templated) :param pod_name: The unique name to give the pod. (templated) :param aws_profile: The named profile containing the credentials for the AWS CLI tool to use. - :param aws_profile: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) diff --git a/airflow/providers/amazon/aws/sensors/dynamodb.py b/airflow/providers/amazon/aws/sensors/dynamodb.py new file mode 100644 index 0000000000000..bcc8f5f54615a --- /dev/null +++ b/airflow/providers/amazon/aws/sensors/dynamodb.py @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from airflow.compat.functools import cached_property +from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook +from airflow.sensors.base import BaseSensorOperator + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class DynamoDBValueSensor(BaseSensorOperator): + """ + Waits for an attribute value to be present for an item in a DynamoDB table. + + :param partition_key_name: DynamoDB partition key name + :param partition_key_value: DynamoDB partition key value + :param attribute_name: DynamoDB attribute name + :param attribute_value: DynamoDB attribute value + :param sort_key_name: (optional) DynamoDB sort key name + :param sort_key_value: (optional) DynamoDB sort key value + """ + + def __init__( + self, + table_name: str, + partition_key_name: str, + partition_key_value: str, + attribute_name: str, + attribute_value: str, + sort_key_name: str | None = None, + sort_key_value: str | None = None, + aws_conn_id: str | None = DynamoDBHook.default_conn_name, + region_name: str | None = None, + **kwargs: Any, + ): + super().__init__(**kwargs) + self.table_name = table_name + self.partition_key_name = partition_key_name + self.partition_key_value = partition_key_value + self.attribute_name = attribute_name + self.attribute_value = attribute_value + self.sort_key_name = sort_key_name + self.sort_key_value = sort_key_value + self.aws_conn_id = aws_conn_id + self.region_name = region_name + + def poke(self, context: Context) -> bool: + """Test DynamoDB item for matching attribute value""" + key = {self.partition_key_name: self.partition_key_value} + msg = ( + f"Checking table {self.table_name} for " + + f"item Partition Key: {self.partition_key_name}={self.partition_key_value}" + ) + + if self.sort_key_name and self.sort_key_value: + key = {self.partition_key_name: self.partition_key_value, self.sort_key_name: self.sort_key_value} + msg += f"\nSort Key: {self.sort_key_name}={self.sort_key_value}" + + msg += f"\nattribute: {self.attribute_name}={self.attribute_value}" + + self.log.info(msg) + table = self.hook.conn.Table(self.table_name) + self.log.info("Table: %s", table) + self.log.info("Key: %s", key) + response = table.get_item(Key=key) + try: + self.log.info("Response: %s", response) + self.log.info("Want: %s = %s", self.attribute_name, self.attribute_value) + self.log.info( + "Got: {response['Item'][self.attribute_name]} = %s", response["Item"][self.attribute_name] + ) + return response["Item"][self.attribute_name] == self.attribute_value + except KeyError: + return False + + @cached_property + def hook(self) -> DynamoDBHook: + """Create and return a DynamoDBHook""" + return DynamoDBHook(self.aws_conn_id, region_name=self.region_name) diff --git a/airflow/providers/amazon/aws/transfers/sql_to_s3.py b/airflow/providers/amazon/aws/transfers/sql_to_s3.py index a8b5a9cd1c53f..8cee9b6cffb15 100644 --- a/airflow/providers/amazon/aws/transfers/sql_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/sql_to_s3.py @@ -80,6 +80,7 @@ class SqlToS3Operator(BaseOperator): CA cert bundle than the one used by botocore. :param file_format: the destination file format, only string 'csv', 'json' or 'parquet' is accepted. :param pd_kwargs: arguments to include in DataFrame ``.to_parquet()``, ``.to_json()`` or ``.to_csv()``. + :param groupby_kwargs: argument to include in DataFrame ``groupby()``. """ template_fields: Sequence[str] = ( @@ -107,6 +108,7 @@ def __init__( verify: bool | str | None = None, file_format: Literal["csv", "json", "parquet"] = "csv", pd_kwargs: dict | None = None, + groupby_kwargs: dict | None = None, **kwargs, ) -> None: super().__init__(**kwargs) @@ -119,6 +121,7 @@ def __init__( self.replace = replace self.pd_kwargs = pd_kwargs or {} self.parameters = parameters + self.groupby_kwargs = groupby_kwargs or {} if "path_or_buf" in self.pd_kwargs: raise AirflowException("The argument path_or_buf is not allowed, please remove it") @@ -170,15 +173,26 @@ def execute(self, context: Context) -> None: self._fix_dtypes(data_df, self.file_format) file_options = FILE_OPTIONS_MAP[self.file_format] - with NamedTemporaryFile(mode=file_options.mode, suffix=file_options.suffix) as tmp_file: - - self.log.info("Writing data to temp file") - getattr(data_df, file_options.function)(tmp_file.name, **self.pd_kwargs) - - self.log.info("Uploading data to S3") - s3_conn.load_file( - filename=tmp_file.name, key=self.s3_key, bucket_name=self.s3_bucket, replace=self.replace - ) + for group_name, df in self._partition_dataframe(df=data_df): + with NamedTemporaryFile(mode=file_options.mode, suffix=file_options.suffix) as tmp_file: + + self.log.info("Writing data to temp file") + getattr(df, file_options.function)(tmp_file.name, **self.pd_kwargs) + + self.log.info("Uploading data to S3") + object_key = f"{self.s3_key}_{group_name}" if group_name else self.s3_key + s3_conn.load_file( + filename=tmp_file.name, key=object_key, bucket_name=self.s3_bucket, replace=self.replace + ) + + def _partition_dataframe(self, df: DataFrame) -> Iterable[tuple[str, DataFrame]]: + """Partition dataframe using pandas groupby() method""" + if not self.groupby_kwargs: + yield "", df + else: + grouped_df = df.groupby(**self.groupby_kwargs) + for group_label in grouped_df.groups.keys(): + yield group_label, grouped_df.get_group(group_label).reset_index(drop=True) def _get_hook(self) -> DbApiHook: self.log.debug("Get connection for %s", self.sql_conn_id) diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index 05ee54482082f..a6f27228049f0 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -97,6 +97,8 @@ integrations: - integration-name: Amazon DynamoDB external-doc-url: https://aws.amazon.com/dynamodb/ logo: /integration-logos/aws/Amazon-DynamoDB_light-bg@4x.png + how-to-guide: + - /docs/apache-airflow-providers-amazon/operators/dynamodb.rst tags: [aws] - integration-name: Amazon EC2 external-doc-url: https://aws.amazon.com/ec2/ @@ -125,26 +127,26 @@ integrations: - integration-name: Amazon EMR external-doc-url: https://aws.amazon.com/emr/ how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/emr.rst + - /docs/apache-airflow-providers-amazon/operators/emr/emr.rst logo: /integration-logos/aws/Amazon-EMR_light-bg@4x.png tags: [aws] - integration-name: Amazon EMR on EKS external-doc-url: https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/emr-eks.html how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/emr_eks.rst + - /docs/apache-airflow-providers-amazon/operators/emr/emr_eks.rst logo: /integration-logos/aws/Amazon-EMR_light-bg@4x.png tags: [aws] - integration-name: Amazon EMR Serverless external-doc-url: https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/emr-serverless.html how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/emr_serverless.rst + - /docs/apache-airflow-providers-amazon/operators/emr/emr_serverless.rst logo: /integration-logos/aws/Amazon-EMR_light-bg@4x.png tags: [aws] - integration-name: Amazon Glacier external-doc-url: https://aws.amazon.com/glacier/ logo: /integration-logos/aws/Amazon-S3-Glacier_light-bg@4x.png how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/glacier.rst + - /docs/apache-airflow-providers-amazon/operators/s3/glacier.rst tags: [aws] - integration-name: Amazon Kinesis Data Firehose external-doc-url: https://aws.amazon.com/kinesis/data-firehose/ @@ -160,14 +162,14 @@ integrations: external-doc-url: https://aws.amazon.com/redshift/ logo: /integration-logos/aws/Amazon-Redshift_light-bg@4x.png how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/redshift_sql.rst - - /docs/apache-airflow-providers-amazon/operators/redshift_cluster.rst + - /docs/apache-airflow-providers-amazon/operators/redshift/redshift_sql.rst + - /docs/apache-airflow-providers-amazon/operators/redshift/redshift_cluster.rst tags: [aws] - integration-name: Amazon Redshift Data external-doc-url: https://aws.amazon.com/redshift/ logo: /integration-logos/aws/Amazon-Redshift_light-bg@4x.png how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/redshift_data.rst + - /docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst tags: [aws] - integration-name: Amazon SageMaker external-doc-url: https://aws.amazon.com/sagemaker/ @@ -201,7 +203,7 @@ integrations: external-doc-url: https://aws.amazon.com/s3/ logo: /integration-logos/aws/Amazon-Simple-Storage-Service-S3_light-bg@4x.png how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/s3.rst + - /docs/apache-airflow-providers-amazon/operators/s3/s3.rst tags: [aws] - integration-name: Amazon Systems Manager (SSM) external-doc-url: https://aws.amazon.com/systems-manager/ @@ -347,6 +349,9 @@ sensors: - integration-name: AWS Database Migration Service python-modules: - airflow.providers.amazon.aws.sensors.dms + - integration-name: Amazon DynamoDB + python-modules: + - airflow.providers.amazon.aws.sensors.dynamodb - integration-name: Amazon EC2 python-modules: - airflow.providers.amazon.aws.sensors.ec2 @@ -501,74 +506,74 @@ hooks: transfers: - source-integration-name: Amazon DynamoDB target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/dynamodb_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/dynamodb_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.dynamodb_to_s3 - source-integration-name: Google Cloud Storage (GCS) target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/gcs_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/gcs_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.gcs_to_s3 - source-integration-name: Amazon Glacier target-integration-name: Google Cloud Storage (GCS) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/glacier_to_gcs.rst python-module: airflow.providers.amazon.aws.transfers.glacier_to_gcs - source-integration-name: Google target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/google_api_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/google_api_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.google_api_to_s3 - source-integration-name: Apache Hive target-integration-name: Amazon DynamoDB - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/hive_to_dynamodb.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/hive_to_dynamodb.rst python-module: airflow.providers.amazon.aws.transfers.hive_to_dynamodb - source-integration-name: Internet Message Access Protocol (IMAP) target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/imap_attachment_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/imap_attachment_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.imap_attachment_to_s3 - source-integration-name: MongoDB target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/mongo_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/mongo_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.mongo_to_s3 - source-integration-name: Amazon Redshift target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/redshift_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/redshift_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.redshift_to_s3 - source-integration-name: Amazon Simple Storage Service (S3) target-integration-name: Amazon Redshift - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/s3_to_redshift.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/s3_to_redshift.rst python-module: airflow.providers.amazon.aws.transfers.s3_to_redshift - source-integration-name: Amazon Simple Storage Service (S3) target-integration-name: SSH File Transfer Protocol (SFTP) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/s3_to_sftp.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/s3_to_sftp.rst python-module: airflow.providers.amazon.aws.transfers.s3_to_sftp - source-integration-name: SSH File Transfer Protocol (SFTP) target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/sftp_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/sftp_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.sftp_to_s3 - source-integration-name: Amazon Simple Storage Service (S3) target-integration-name: File Transfer Protocol (FTP) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/s3_to_ftp.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/s3_to_ftp.rst python-module: airflow.providers.amazon.aws.transfers.s3_to_ftp - source-integration-name: Exasol target-integration-name: Amazon Simple Storage Service (S3) python-module: airflow.providers.amazon.aws.transfers.exasol_to_s3 - source-integration-name: File Transfer Protocol (FTP) target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/ftp_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/ftp_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.ftp_to_s3 - source-integration-name: Salesforce target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/salesforce_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/salesforce_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.salesforce_to_s3 - source-integration-name: Local target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/local_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/local_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.local_to_s3 - source-integration-name: Common SQL target-integration-name: Amazon Simple Storage Service (S3) - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/sql_to_s3.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/sql_to_s3.rst python-module: airflow.providers.amazon.aws.transfers.sql_to_s3 - source-integration-name: Amazon Simple Storage Service (S3) target-integration-name: Common SQL - how-to-guide: /docs/apache-airflow-providers-amazon/operators/transfer/s3_to_sql.rst + how-to-guide: /docs/apache-airflow-providers-amazon/transfer/s3_to_sql.rst python-module: airflow.providers.amazon.aws.transfers.s3_to_sql - source-integration-name: Amazon Web Services target-integration-name: Amazon Web Services diff --git a/airflow/providers/cncf/kubernetes/operators/pod.py b/airflow/providers/cncf/kubernetes/operators/pod.py index f6896547261fd..b37866c315f6b 100644 --- a/airflow/providers/cncf/kubernetes/operators/pod.py +++ b/airflow/providers/cncf/kubernetes/operators/pod.py @@ -33,7 +33,7 @@ from urllib3.exceptions import HTTPError from airflow.compat.functools import cached_property -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, AirflowSkipException from airflow.kubernetes import pod_generator from airflow.kubernetes.pod_generator import PodGenerator from airflow.kubernetes.secret import Secret @@ -213,6 +213,9 @@ class KubernetesPodOperator(BaseOperator): to populate the environment variables with. The contents of the target ConfigMap's Data field will represent the key-value pairs as environment variables. Extends env_from. + :param skip_exit_code: If task exits with this exit code, leave the task + in ``skipped`` state (default: None). If set to ``None``, any non-zero + exit code will be treated as a failure. :param base_container_name: The name of the base container in the pod. This container's logs will appear as part of this task's logs if get_logs is True. Defaults to None. If None, will consult the class variable BASE_CONTAINER_NAME (which defaults to "base") for the base @@ -288,6 +291,7 @@ def __init__( pod_runtime_info_envs: list[k8s.V1EnvVar] | None = None, termination_grace_period: int | None = None, configmaps: list[str] | None = None, + skip_exit_code: int | None = None, base_container_name: str | None = None, deferrable: bool = False, poll_interval: float = 2, @@ -357,6 +361,7 @@ def __init__( self.termination_grace_period = termination_grace_period self.pod_request_obj: k8s.V1Pod | None = None self.pod: k8s.V1Pod | None = None + self.skip_exit_code = skip_exit_code self.base_container_name = base_container_name or self.BASE_CONTAINER_NAME self.deferrable = deferrable self.poll_interval = poll_interval @@ -670,6 +675,24 @@ def cleanup(self, pod: k8s.V1Pod, remote_pod: k8s.V1Pod): error_message = get_container_termination_message(remote_pod, self.base_container_name) error_message = "\n" + error_message if error_message else "" + if self.skip_exit_code is not None: + container_statuses = ( + remote_pod.status.container_statuses if remote_pod and remote_pod.status else None + ) or [] + base_container_status = next( + (x for x in container_statuses if x.name == self.base_container_name), None + ) + exit_code = ( + base_container_status.last_state.terminated.exit_code + if base_container_status + and base_container_status.last_state + and base_container_status.last_state.terminated + else None + ) + if exit_code == self.skip_exit_code: + raise AirflowSkipException( + f"Pod {pod and pod.metadata.name} returned exit code {self.skip_exit_code}. Skipping." + ) raise AirflowException( f"Pod {pod and pod.metadata.name} returned a failure:\n{error_message}\n" f"remote_pod: {remote_pod}" diff --git a/airflow/providers/databricks/hooks/databricks.py b/airflow/providers/databricks/hooks/databricks.py index bb8a1dc88080e..9dc1df5afe675 100644 --- a/airflow/providers/databricks/hooks/databricks.py +++ b/airflow/providers/databricks/hooks/databricks.py @@ -43,6 +43,7 @@ SUBMIT_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/submit") GET_RUN_ENDPOINT = ("GET", "api/2.1/jobs/runs/get") CANCEL_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel") +DELETE_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/delete") OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "api/2.1/jobs/runs/get-output") INSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/install") @@ -351,6 +352,15 @@ def cancel_run(self, run_id: int) -> None: json = {"run_id": run_id} self._do_api_call(CANCEL_RUN_ENDPOINT, json) + def delete_run(self, run_id: int) -> None: + """ + Deletes a non-active run. + + :param run_id: id of the run + """ + json = {"run_id": run_id} + self._do_api_call(DELETE_RUN_ENDPOINT, json) + def restart_cluster(self, json: dict) -> None: """ Restarts the cluster. diff --git a/airflow/providers/docker/hooks/docker.py b/airflow/providers/docker/hooks/docker.py index d22d28c398a00..d67846e9ec040 100644 --- a/airflow/providers/docker/hooks/docker.py +++ b/airflow/providers/docker/hooks/docker.py @@ -87,6 +87,7 @@ def construct_tls_config( ca_cert: str | None = None, client_cert: str | None = None, client_key: str | None = None, + verify: bool = True, assert_hostname: str | bool | None = None, ssl_version: str | None = None, ) -> TLSConfig | bool: @@ -96,6 +97,7 @@ def construct_tls_config( :param ca_cert: Path to a PEM-encoded CA (Certificate Authority) certificate file. :param client_cert: Path to PEM-encoded certificate file. :param client_key: Path to PEM-encoded key file. + :param verify: Set ``True`` to verify the validity of the provided certificate. :param assert_hostname: Hostname to match against the docker server certificate or ``False`` to disable the check. :param ssl_version: Version of SSL to use when communicating with docker daemon. @@ -106,7 +108,7 @@ def construct_tls_config( return TLSConfig( ca_cert=ca_cert, client_cert=(client_cert, client_key), - verify=True, + verify=verify, ssl_version=ssl_version, assert_hostname=assert_hostname, ) diff --git a/airflow/providers/docker/operators/docker.py b/airflow/providers/docker/operators/docker.py index 634662b1d4167..a626375e216e8 100644 --- a/airflow/providers/docker/operators/docker.py +++ b/airflow/providers/docker/operators/docker.py @@ -112,6 +112,7 @@ class DockerOperator(BaseOperator): :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. + :param tls_verify: Set ``True`` to verify the validity of the provided certificate. :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. @@ -186,6 +187,7 @@ def __init__( tls_ca_cert: str | None = None, tls_client_cert: str | None = None, tls_client_key: str | None = None, + tls_verify: bool = True, tls_hostname: str | bool | None = None, tls_ssl_version: str | None = None, mount_tmp_dir: bool = True, @@ -248,6 +250,7 @@ def __init__( self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key + self.tls_verify = tls_verify self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.mount_tmp_dir = mount_tmp_dir @@ -282,6 +285,7 @@ def hook(self) -> DockerHook: ca_cert=self.tls_ca_cert, client_cert=self.tls_client_cert, client_key=self.tls_client_key, + verify=self.tls_verify, assert_hostname=self.tls_hostname, ssl_version=self.tls_ssl_version, ) diff --git a/airflow/providers/google/CHANGELOG.rst b/airflow/providers/google/CHANGELOG.rst index 6129659bad43e..516c67c46c908 100644 --- a/airflow/providers/google/CHANGELOG.rst +++ b/airflow/providers/google/CHANGELOG.rst @@ -23,6 +23,16 @@ Changelog --------- +10.0.0 +...... + +Breaking changes +~~~~~~~~~~~~~~~~ + +Google has announced sunset of Campaign Manager 360 v3.5 by Apr 20, 2023. For more information +please check: ``_ . As a result, the +default api version for Campaign Manager 360 operator was updated to the latest v4 version. + 9.0.0 ..... diff --git a/airflow/providers/google/cloud/operators/bigquery.py b/airflow/providers/google/cloud/operators/bigquery.py index 9de5663420bf6..092290a77fa30 100644 --- a/airflow/providers/google/cloud/operators/bigquery.py +++ b/airflow/providers/google/cloud/operators/bigquery.py @@ -905,6 +905,7 @@ def execute(self, context: Context): schema: dict[str, list] = hook.get_schema( dataset_id=self.dataset_id, table_id=self.table_id, + project_id=self.project_id, ) if "fields" in schema: self.selected_fields = ",".join([field["name"] for field in schema["fields"]]) diff --git a/airflow/providers/google/marketing_platform/example_dags/example_display_video.py b/airflow/providers/google/marketing_platform/example_dags/example_display_video.py index 7e6c3c38ac738..9caf6eb509480 100644 --- a/airflow/providers/google/marketing_platform/example_dags/example_display_video.py +++ b/airflow/providers/google/marketing_platform/example_dags/example_display_video.py @@ -30,20 +30,16 @@ from airflow.providers.google.marketing_platform.hooks.display_video import GoogleDisplayVideo360Hook from airflow.providers.google.marketing_platform.operators.display_video import ( GoogleDisplayVideo360CreateQueryOperator, - GoogleDisplayVideo360CreateReportOperator, GoogleDisplayVideo360CreateSDFDownloadTaskOperator, GoogleDisplayVideo360DeleteReportOperator, GoogleDisplayVideo360DownloadLineItemsOperator, - GoogleDisplayVideo360DownloadReportOperator, GoogleDisplayVideo360DownloadReportV2Operator, GoogleDisplayVideo360RunQueryOperator, - GoogleDisplayVideo360RunReportOperator, GoogleDisplayVideo360SDFtoGCSOperator, GoogleDisplayVideo360UploadLineItemsOperator, ) from airflow.providers.google.marketing_platform.sensors.display_video import ( GoogleDisplayVideo360GetSDFDownloadOperationSensor, - GoogleDisplayVideo360ReportSensor, GoogleDisplayVideo360RunQuerySensor, ) @@ -60,24 +56,6 @@ ENTITY_TYPE = os.environ.get("GMP_ENTITY_TYPE", "LineItem") ERF_SOURCE_OBJECT = GoogleDisplayVideo360Hook.erf_uri(GMP_PARTNER_ID, ENTITY_TYPE) -REPORT = { - "kind": "doubleclickbidmanager#query", - "metadata": { - "title": "Polidea Test Report", - "dataRange": "LAST_7_DAYS", - "format": "CSV", - "sendNotification": False, - }, - "params": { - "type": "TYPE_GENERAL", - "groupBys": ["FILTER_DATE", "FILTER_PARTNER"], - "filters": [{"type": "FILTER_PARTNER", "value": 1486931}], - "metrics": ["METRIC_IMPRESSIONS", "METRIC_CLICKS"], - "includeInviteData": True, - }, - "schedule": {"frequency": "ONE_TIME"}, -} - REPORT_V2 = { "metadata": { "title": "Airflow Test Report", @@ -109,48 +87,6 @@ START_DATE = datetime(2021, 1, 1) -with models.DAG( - "example_display_video", - start_date=START_DATE, - catchup=False, -) as dag1: - # [START howto_google_display_video_createquery_report_operator] - create_report = GoogleDisplayVideo360CreateReportOperator(body=REPORT, task_id="create_report") - report_id = cast(str, XComArg(create_report, key="report_id")) - # [END howto_google_display_video_createquery_report_operator] - - # [START howto_google_display_video_runquery_report_operator] - run_report = GoogleDisplayVideo360RunReportOperator( - report_id=report_id, parameters=PARAMETERS, task_id="run_report" - ) - # [END howto_google_display_video_runquery_report_operator] - - # [START howto_google_display_video_wait_report_operator] - wait_for_report = GoogleDisplayVideo360ReportSensor(task_id="wait_for_report", report_id=report_id) - # [END howto_google_display_video_wait_report_operator] - - # [START howto_google_display_video_getquery_report_operator] - get_report = GoogleDisplayVideo360DownloadReportOperator( - report_id=report_id, - task_id="get_report", - bucket_name=BUCKET, - report_name="test1.csv", - ) - # [END howto_google_display_video_getquery_report_operator] - - # [START howto_google_display_video_deletequery_report_operator] - delete_report = GoogleDisplayVideo360DeleteReportOperator(report_id=report_id, task_id="delete_report") - # [END howto_google_display_video_deletequery_report_operator] - - run_report >> wait_for_report >> get_report >> delete_report - - # Task dependencies created via `XComArgs`: - # create_report >> run_report - # create_report >> wait_for_report - # create_report >> get_report - # create_report >> delete_report - - with models.DAG( "example_display_video_misc", start_date=START_DATE, diff --git a/airflow/providers/google/marketing_platform/hooks/campaign_manager.py b/airflow/providers/google/marketing_platform/hooks/campaign_manager.py index f64d3f3b3cf6a..f0a02ddd255e3 100644 --- a/airflow/providers/google/marketing_platform/hooks/campaign_manager.py +++ b/airflow/providers/google/marketing_platform/hooks/campaign_manager.py @@ -34,7 +34,7 @@ class GoogleCampaignManagerHook(GoogleBaseHook): def __init__( self, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, @@ -236,7 +236,7 @@ def conversions_batch_insert( :param profile_id: User profile ID associated with this request. :param conversions: Conversations to insert, should by type of Conversation: - https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource + https://developers.google.com/doubleclick-advertisers/rest/v4/conversions/batchinsert :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. :param encryption_entity_id: The encryption entity ID. This should match the encryption @@ -280,7 +280,7 @@ def conversions_batch_update( :param profile_id: User profile ID associated with this request. :param conversions: Conversations to update, should by type of Conversation: - https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource + https://developers.google.com/doubleclick-advertisers/rest/v4/conversions/batchupdate :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. :param encryption_entity_id: The encryption entity ID. This should match the encryption diff --git a/airflow/providers/google/marketing_platform/hooks/display_video.py b/airflow/providers/google/marketing_platform/hooks/display_video.py index 9422426fbe3ad..d7927f709e547 100644 --- a/airflow/providers/google/marketing_platform/hooks/display_video.py +++ b/airflow/providers/google/marketing_platform/hooks/display_video.py @@ -18,7 +18,6 @@ """This module contains Google DisplayVideo hook.""" from __future__ import annotations -import warnings from typing import Any, Sequence from googleapiclient.discovery import Resource, build @@ -43,11 +42,6 @@ def __init__( delegate_to=delegate_to, impersonation_chain=impersonation_chain, ) - if api_version in ["v1", "v1.1"]: - warnings.warn( - f"API {api_version} is deprecated and shortly will be removed please use v2", - DeprecationWarning, - ) self.api_version = api_version def get_conn(self) -> Resource: @@ -99,10 +93,7 @@ def create_query(self, query: dict[str, Any]) -> dict: :param query: Query object to be passed to request body. """ - if self.api_version in ["v1", "v1.1"]: - response = self.get_conn().queries().createquery(body=query).execute(num_retries=self.num_retries) - else: - response = self.get_conn().queries().create(body=query).execute(num_retries=self.num_retries) + response = self.get_conn().queries().create(body=query).execute(num_retries=self.num_retries) return response def delete_query(self, query_id: str) -> None: @@ -111,10 +102,7 @@ def delete_query(self, query_id: str) -> None: :param query_id: Query ID to delete. """ - if self.api_version in ["v1", "v1.1"]: - self.get_conn().queries().deletequery(queryId=query_id).execute(num_retries=self.num_retries) - else: - self.get_conn().queries().delete(queryId=query_id).execute(num_retries=self.num_retries) + self.get_conn().queries().delete(queryId=query_id).execute(num_retries=self.num_retries) def get_query(self, query_id: str) -> dict: """ @@ -122,20 +110,12 @@ def get_query(self, query_id: str) -> dict: :param query_id: Query ID to retrieve. """ - if self.api_version in ["v1", "v1.1"]: - response = ( - self.get_conn().queries().getquery(queryId=query_id).execute(num_retries=self.num_retries) - ) - else: - response = self.get_conn().queries().get(queryId=query_id).execute(num_retries=self.num_retries) + response = self.get_conn().queries().get(queryId=query_id).execute(num_retries=self.num_retries) return response def list_queries(self) -> list[dict]: """Retrieves stored queries.""" - if self.api_version in ["v1", "v1.1"]: - response = self.get_conn().queries().listqueries().execute(num_retries=self.num_retries) - else: - response = self.get_conn().queries().list().execute(num_retries=self.num_retries) + response = self.get_conn().queries().list().execute(num_retries=self.num_retries) return response.get("queries", []) def run_query(self, query_id: str, params: dict[str, Any] | None) -> dict: @@ -145,20 +125,9 @@ def run_query(self, query_id: str, params: dict[str, Any] | None) -> dict: :param query_id: Query ID to run. :param params: Parameters for the report. """ - if self.api_version in ["v1", "v1.1"]: - return ( - self.get_conn() - .queries() - .runquery(queryId=query_id, body=params) - .execute(num_retries=self.num_retries) - ) - else: - return ( - self.get_conn() - .queries() - .run(queryId=query_id, body=params) - .execute(num_retries=self.num_retries) - ) + return ( + self.get_conn().queries().run(queryId=query_id, body=params).execute(num_retries=self.num_retries) + ) def get_report(self, query_id: str, report_id: str) -> dict: """ diff --git a/airflow/providers/google/marketing_platform/operators/campaign_manager.py b/airflow/providers/google/marketing_platform/operators/campaign_manager.py index e9dcfc95dd302..739d1328892ae 100644 --- a/airflow/providers/google/marketing_platform/operators/campaign_manager.py +++ b/airflow/providers/google/marketing_platform/operators/campaign_manager.py @@ -40,7 +40,7 @@ class GoogleCampaignManagerDeleteReportOperator(BaseOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/reports/delete + `https://developers.google.com/doubleclick-advertisers/rest/v4/reports/delete` .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -49,7 +49,7 @@ class GoogleCampaignManagerDeleteReportOperator(BaseOperator): :param profile_id: The DFA user profile ID. :param report_name: The name of the report to delete. :param report_id: The ID of the report. - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -80,7 +80,7 @@ def __init__( profile_id: str, report_name: str | None = None, report_id: str | None = None, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, @@ -127,7 +127,7 @@ class GoogleCampaignManagerDownloadReportOperator(BaseOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/reports/files/get + `https://developers.google.com/doubleclick-advertisers/rest/v4/reports/get` .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -140,7 +140,7 @@ class GoogleCampaignManagerDownloadReportOperator(BaseOperator): :param report_name: The report name to set when uploading the local file. :param gzip: Option to compress local file or file data for upload :param chunk_size: File will be downloaded in chunks of this many bytes. - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -178,7 +178,7 @@ def __init__( report_name: str | None = None, gzip: bool = True, chunk_size: int = 10 * 1024 * 1024, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, @@ -259,7 +259,7 @@ class GoogleCampaignManagerInsertReportOperator(BaseOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/reports/insert + `https://developers.google.com/doubleclick-advertisers/rest/v4/reports/insert` .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -267,7 +267,7 @@ class GoogleCampaignManagerInsertReportOperator(BaseOperator): :param profile_id: The DFA user profile ID. :param report: Report to be created. - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -298,7 +298,7 @@ def __init__( *, profile_id: str, report: dict[str, Any], - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, @@ -339,7 +339,7 @@ class GoogleCampaignManagerRunReportOperator(BaseOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/reports/run + `https://developers.google.com/doubleclick-advertisers/rest/v4/reports/run` .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -348,7 +348,7 @@ class GoogleCampaignManagerRunReportOperator(BaseOperator): :param profile_id: The DFA profile ID. :param report_id: The ID of the report. :param synchronous: If set and true, tries to run the report synchronously. - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -379,7 +379,7 @@ def __init__( profile_id: str, report_id: str, synchronous: bool = False, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, @@ -419,15 +419,15 @@ class GoogleCampaignManagerBatchInsertConversionsOperator(BaseOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/conversions/batchinsert + `https://developers.google.com/doubleclick-advertisers/rest/v4/conversions/batchinsert` .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:GoogleCampaignManagerBatchInsertConversionsOperator` :param profile_id: User profile ID associated with this request. - :param conversions: Conversations to insert, should by type of Conversation: - https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource + :param conversions: Conversions to insert, should be type of Conversion: + https://developers.google.com/doubleclick-advertisers/rest/v4/conversions :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. :param encryption_entity_id: The encryption entity ID. This should match the encryption @@ -435,7 +435,7 @@ class GoogleCampaignManagerBatchInsertConversionsOperator(BaseOperator): :param encryption_source: Describes whether the encrypted cookie was received from ad serving (the %m macro) or from Data Transfer. :param max_failed_inserts: The maximum number of conversions that failed to be inserted - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -468,7 +468,7 @@ def __init__( encryption_entity_id: int, encryption_source: str, max_failed_inserts: int = 0, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, @@ -510,15 +510,15 @@ class GoogleCampaignManagerBatchUpdateConversionsOperator(BaseOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/conversions/batchupdate + `https://developers.google.com/doubleclick-advertisers/rest/v4/conversions/batchupdate` .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:GoogleCampaignManagerBatchUpdateConversionsOperator` :param profile_id: User profile ID associated with this request. - :param conversions: Conversations to update, should by type of Conversation: - https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource + :param conversions: Conversations to update, should be type of Conversion: + https://developers.google.com/doubleclick-advertisers/rest/v4/conversions :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. :param encryption_entity_id: The encryption entity ID. This should match the encryption @@ -526,7 +526,7 @@ class GoogleCampaignManagerBatchUpdateConversionsOperator(BaseOperator): :param encryption_source: Describes whether the encrypted cookie was received from ad serving (the %m macro) or from Data Transfer. :param max_failed_updates: The maximum number of conversions that failed to be updated - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -559,7 +559,7 @@ def __init__( encryption_entity_id: int, encryption_source: str, max_failed_updates: int = 0, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, impersonation_chain: str | Sequence[str] | None = None, diff --git a/airflow/providers/google/marketing_platform/operators/display_video.py b/airflow/providers/google/marketing_platform/operators/display_video.py index 738d734bade17..003ad4723c67a 100644 --- a/airflow/providers/google/marketing_platform/operators/display_video.py +++ b/airflow/providers/google/marketing_platform/operators/display_video.py @@ -23,7 +23,6 @@ import shutil import tempfile import urllib.request -import warnings from typing import TYPE_CHECKING, Any, Sequence from urllib.parse import urlsplit @@ -36,84 +35,6 @@ from airflow.utils.context import Context -class GoogleDisplayVideo360CreateReportOperator(BaseOperator): - """ - Creates a query. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:GoogleDisplayVideo360CreateReportOperator` - - .. seealso:: - Check also the official API docs: - `https://developers.google.com/bid-manager/v1/queries/createquery` - - :param body: Report object passed to the request's body as described here: - https://developers.google.com/bid-manager/v1/queries#resource - :param api_version: The version of the api that will be requested for example 'v3'. - :param gcp_conn_id: The connection ID to use when fetching connection info. - :param delegate_to: The account to impersonate using domain-wide delegation of authority, - if any. For this to work, the service account making the request must have - domain-wide delegation enabled. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "body", - "impersonation_chain", - ) - template_ext: Sequence[str] = (".json",) - - def __init__( - self, - *, - body: dict[str, Any], - api_version: str = "v1", - gcp_conn_id: str = "google_cloud_default", - delegate_to: str | None = None, - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.body = body - - warnings.warn( - "This operator is deprecated. Please use `GoogleDisplayVideo360CreateQueryOperator`", - DeprecationWarning, - ) - self.api_version = api_version - self.gcp_conn_id = gcp_conn_id - self.delegate_to = delegate_to - self.impersonation_chain = impersonation_chain - - def prepare_template(self) -> None: - # If .json is passed then we have to read the file - if isinstance(self.body, str) and self.body.endswith(".json"): - with open(self.body) as file: - self.body = json.load(file) - - def execute(self, context: Context) -> dict: - hook = GoogleDisplayVideo360Hook( - gcp_conn_id=self.gcp_conn_id, - delegate_to=self.delegate_to, - api_version=self.api_version, - impersonation_chain=self.impersonation_chain, - ) - self.log.info("Creating Display & Video 360 report.") - response = hook.create_query(query=self.body) - report_id = response["queryId"] - self.xcom_push(context, key="report_id", value=report_id) - self.log.info("Created report with ID: %s", report_id) - return response - - class GoogleDisplayVideo360CreateQueryOperator(BaseOperator): """ Creates a query. @@ -235,11 +156,6 @@ def __init__( super().__init__(**kwargs) self.report_id = report_id self.report_name = report_name - if api_version in ["v1", "v1.1"]: - warnings.warn( - f"API {api_version} is deprecated and shortly will be removed please use v2", - DeprecationWarning, - ) self.api_version = api_version self.gcp_conn_id = gcp_conn_id self.delegate_to = delegate_to @@ -272,132 +188,6 @@ def execute(self, context: Context) -> None: self.log.info("Report deleted.") -class GoogleDisplayVideo360DownloadReportOperator(BaseOperator): - """ - Retrieves a stored query. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:GoogleDisplayVideo360DownloadReportOperator` - - .. seealso:: - Check also the official API docs: - `https://developers.google.com/bid-manager/v1/queries/getquery` - - :param report_id: Report ID to retrieve. - :param bucket_name: The bucket to upload to. - :param report_name: The report name to set when uploading the local file. - :param chunk_size: File will be downloaded in chunks of this many bytes. - :param gzip: Option to compress local file or file data for upload - :param api_version: The version of the api that will be requested for example 'v3'. - :param gcp_conn_id: The connection ID to use when fetching connection info. - :param delegate_to: The account to impersonate using domain-wide delegation of authority, - if any. For this to work, the service account making the request must have - domain-wide delegation enabled. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "report_id", - "bucket_name", - "report_name", - "impersonation_chain", - ) - - def __init__( - self, - *, - report_id: str, - bucket_name: str, - report_name: str | None = None, - gzip: bool = True, - chunk_size: int = 10 * 1024 * 1024, - api_version: str = "v1", - gcp_conn_id: str = "google_cloud_default", - delegate_to: str | None = None, - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - warnings.warn( - "This operator is deprecated. Please use `GoogleDisplayVideo360DownloadReportV2Operator`", - DeprecationWarning, - ) - self.report_id = report_id - self.chunk_size = chunk_size - self.gzip = gzip - self.bucket_name = bucket_name - self.report_name = report_name - self.api_version = api_version - self.gcp_conn_id = gcp_conn_id - self.delegate_to = delegate_to - self.impersonation_chain = impersonation_chain - - def _resolve_file_name(self, name: str) -> str: - new_name = name if name.endswith(".csv") else f"{name}.csv" - new_name = f"{new_name}.gz" if self.gzip else new_name - return new_name - - @staticmethod - def _set_bucket_name(name: str) -> str: - bucket = name if not name.startswith("gs://") else name[5:] - return bucket.strip("/") - - def execute(self, context: Context): - hook = GoogleDisplayVideo360Hook( - gcp_conn_id=self.gcp_conn_id, - delegate_to=self.delegate_to, - api_version=self.api_version, - impersonation_chain=self.impersonation_chain, - ) - gcs_hook = GCSHook( - gcp_conn_id=self.gcp_conn_id, - delegate_to=self.delegate_to, - impersonation_chain=self.impersonation_chain, - ) - - resource = hook.get_query(query_id=self.report_id) - # Check if report is ready - if resource["metadata"]["running"]: - raise AirflowException(f"Report {self.report_id} is still running") - - # If no custom report_name provided, use DV360 name - file_url = resource["metadata"]["googleCloudStoragePathForLatestReport"] - report_name = self.report_name or urlsplit(file_url).path.split("/")[-1] - report_name = self._resolve_file_name(report_name) - - # Download the report - self.log.info("Starting downloading report %s", self.report_id) - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - with urllib.request.urlopen(file_url) as response: - shutil.copyfileobj(response, temp_file, length=self.chunk_size) - - temp_file.flush() - # Upload the local file to bucket - bucket_name = self._set_bucket_name(self.bucket_name) - gcs_hook.upload( - bucket_name=bucket_name, - object_name=report_name, - gzip=self.gzip, - filename=temp_file.name, - mime_type="text/csv", - ) - self.log.info( - "Report %s was saved in bucket %s as %s.", - self.report_id, - self.bucket_name, - report_name, - ) - self.xcom_push(context, key="report_name", value=report_name) - - class GoogleDisplayVideo360DownloadReportV2Operator(BaseOperator): """ Retrieves a stored query. @@ -523,80 +313,6 @@ def execute(self, context: Context): self.xcom_push(context, key="report_name", value=report_name) -class GoogleDisplayVideo360RunReportOperator(BaseOperator): - """ - Runs a stored query to generate a report. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:GoogleDisplayVideo360RunReportOperator` - - .. seealso:: - Check also the official API docs: - `https://developers.google.com/bid-manager/v1/queries/runquery` - - :param report_id: Report ID to run. - :param parameters: Parameters for running a report as described here: - https://developers.google.com/bid-manager/v1/queries/runquery - :param api_version: The version of the api that will be requested for example 'v3'. - :param gcp_conn_id: The connection ID to use when fetching connection info. - :param delegate_to: The account to impersonate using domain-wide delegation of authority, - if any. For this to work, the service account making the request must have - domain-wide delegation enabled. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "report_id", - "parameters", - "impersonation_chain", - ) - - def __init__( - self, - *, - report_id: str, - parameters: dict[str, Any] | None = None, - api_version: str = "v1", - gcp_conn_id: str = "google_cloud_default", - delegate_to: str | None = None, - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.report_id = report_id - warnings.warn( - "This operator is deprecated. Please use `GoogleDisplayVideo360RunQueryOperator`", - DeprecationWarning, - ) - self.api_version = api_version - self.gcp_conn_id = gcp_conn_id - self.delegate_to = delegate_to - self.parameters = parameters - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = GoogleDisplayVideo360Hook( - gcp_conn_id=self.gcp_conn_id, - delegate_to=self.delegate_to, - api_version=self.api_version, - impersonation_chain=self.impersonation_chain, - ) - self.log.info( - "Running report %s with the following parameters:\n %s", - self.report_id, - self.parameters, - ) - hook.run_query(query_id=self.report_id, params=self.parameters) - - class GoogleDisplayVideo360RunQueryOperator(BaseOperator): """ Runs a stored query to generate a report. diff --git a/airflow/providers/google/marketing_platform/sensors/campaign_manager.py b/airflow/providers/google/marketing_platform/sensors/campaign_manager.py index d8c0a3166c211..0bc5ae1d8b556 100644 --- a/airflow/providers/google/marketing_platform/sensors/campaign_manager.py +++ b/airflow/providers/google/marketing_platform/sensors/campaign_manager.py @@ -33,7 +33,7 @@ class GoogleCampaignManagerReportSensor(BaseSensorOperator): .. seealso:: Check official API docs: - https://developers.google.com/doubleclick-advertisers/v3.3/reports/get + https://developers.google.com/doubleclick-advertisers/rest/v4/reports/get .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -42,7 +42,7 @@ class GoogleCampaignManagerReportSensor(BaseSensorOperator): :param profile_id: The DFA user profile ID. :param report_id: The ID of the report. :param file_id: The ID of the report file. - :param api_version: The version of the api that will be requested for example 'v3'. + :param api_version: The version of the api that will be requested, for example 'v4'. :param gcp_conn_id: The connection ID to use when fetching connection info. :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have @@ -81,7 +81,7 @@ def __init__( profile_id: str, report_id: str, file_id: str, - api_version: str = "v3.3", + api_version: str = "v4", gcp_conn_id: str = "google_cloud_default", delegate_to: str | None = None, mode: str = "reschedule", diff --git a/airflow/providers/google/marketing_platform/sensors/display_video.py b/airflow/providers/google/marketing_platform/sensors/display_video.py index d4d10ba9552de..bcbb407af8fe1 100644 --- a/airflow/providers/google/marketing_platform/sensors/display_video.py +++ b/airflow/providers/google/marketing_platform/sensors/display_video.py @@ -17,7 +17,6 @@ """Sensor for detecting the completion of DV360 reports.""" from __future__ import annotations -import warnings from typing import TYPE_CHECKING, Sequence from airflow import AirflowException @@ -28,70 +27,6 @@ from airflow.utils.context import Context -class GoogleDisplayVideo360ReportSensor(BaseSensorOperator): - """ - Sensor for detecting the completion of DV360 reports. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:GoogleDisplayVideo360ReportSensor` - - :param report_id: Report ID to delete. - :param api_version: The version of the api that will be requested for example 'v3'. - :param gcp_conn_id: The connection ID to use when fetching connection info. - :param delegate_to: The account to impersonate using domain-wide delegation of authority, - if any. For this to work, the service account making the request must have - domain-wide delegation enabled. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "report_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - report_id: str, - api_version: str = "v1", - gcp_conn_id: str = "google_cloud_default", - delegate_to: str | None = None, - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - warnings.warn( - "This operator is deprecated. Please use `GoogleDisplayVideo360RunQuerySensor`", - DeprecationWarning, - ) - self.report_id = report_id - self.api_version = api_version - self.gcp_conn_id = gcp_conn_id - self.delegate_to = delegate_to - self.impersonation_chain = impersonation_chain - - def poke(self, context: Context) -> bool: - hook = GoogleDisplayVideo360Hook( - gcp_conn_id=self.gcp_conn_id, - delegate_to=self.delegate_to, - api_version=self.api_version, - impersonation_chain=self.impersonation_chain, - ) - - response = hook.get_query(query_id=self.report_id) - if response and not response.get("metadata", {}).get("running"): - return True - return False - - class GoogleDisplayVideo360GetSDFDownloadOperationSensor(BaseSensorOperator): """ Sensor for detecting the completion of SDF operation. diff --git a/airflow/providers/google/provider.yaml b/airflow/providers/google/provider.yaml index 6b3006c8c0229..6211b2d60d918 100644 --- a/airflow/providers/google/provider.yaml +++ b/airflow/providers/google/provider.yaml @@ -30,6 +30,7 @@ description: | suspended: false versions: + - 10.0.0 - 9.0.0 - 8.12.0 - 8.11.0 diff --git a/airflow/providers/microsoft/azure/sensors/wasb.py b/airflow/providers/microsoft/azure/sensors/wasb.py index 017d73720dc92..77f2256a64de7 100644 --- a/airflow/providers/microsoft/azure/sensors/wasb.py +++ b/airflow/providers/microsoft/azure/sensors/wasb.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import warnings from datetime import timedelta from typing import TYPE_CHECKING, Any, Sequence @@ -38,6 +39,8 @@ class WasbBlobSensor(BaseSensorOperator): :param wasb_conn_id: Reference to the :ref:`wasb connection `. :param check_options: Optional keyword arguments that `WasbHook.check_for_blob()` takes. + :param deferrable: Run sensor in the deferrable mode. + :param public_read: whether an anonymous public read access should be used. Default is False """ template_fields: Sequence[str] = ("container_name", "blob_name") @@ -49,6 +52,8 @@ def __init__( blob_name: str, wasb_conn_id: str = "wasb_default", check_options: dict | None = None, + public_read: bool = False, + deferrable: bool = False, **kwargs, ) -> None: super().__init__(**kwargs) @@ -58,57 +63,32 @@ def __init__( self.container_name = container_name self.blob_name = blob_name self.check_options = check_options + self.public_read = public_read + self.deferrable = deferrable def poke(self, context: Context): self.log.info("Poking for blob: %s\n in wasb://%s", self.blob_name, self.container_name) hook = WasbHook(wasb_conn_id=self.wasb_conn_id) return hook.check_for_blob(self.container_name, self.blob_name, **self.check_options) - -class WasbBlobAsyncSensor(WasbBlobSensor): - """ - Polls asynchronously for the existence of a blob in a WASB container. - - :param container_name: name of the container in which the blob should be searched for - :param blob_name: name of the blob to check existence for - :param wasb_conn_id: the connection identifier for connecting to Azure WASB - :param poke_interval: polling period in seconds to check for the status - :param public_read: whether an anonymous public read access should be used. Default is False - :param timeout: Time, in seconds before the task times out and fails. - """ - - def __init__( - self, - *, - container_name: str, - blob_name: str, - wasb_conn_id: str = "wasb_default", - public_read: bool = False, - poke_interval: float = 5.0, - **kwargs: Any, - ): - self.container_name = container_name - self.blob_name = blob_name - self.poke_interval = poke_interval - super().__init__(container_name=container_name, blob_name=blob_name, **kwargs) - self.wasb_conn_id = wasb_conn_id - self.public_read = public_read - def execute(self, context: Context) -> None: - """Defers trigger class to poll for state of the job run until it reaches - a failure state or success state + """Defers trigger class to poll for state of the job run until + it reaches a failure state or success state """ - self.defer( - timeout=timedelta(seconds=self.timeout), - trigger=WasbBlobSensorTrigger( - container_name=self.container_name, - blob_name=self.blob_name, - wasb_conn_id=self.wasb_conn_id, - public_read=self.public_read, - poke_interval=self.poke_interval, - ), - method_name="execute_complete", - ) + if not self.deferrable: + super().execute(context=context) + else: + self.defer( + timeout=timedelta(seconds=self.timeout), + trigger=WasbBlobSensorTrigger( + container_name=self.container_name, + blob_name=self.blob_name, + wasb_conn_id=self.wasb_conn_id, + public_read=self.public_read, + poke_interval=self.poke_interval, + ), + method_name="execute_complete", + ) def execute_complete(self, context: Context, event: dict[str, str]) -> None: """ @@ -124,6 +104,30 @@ def execute_complete(self, context: Context, event: dict[str, str]) -> None: raise AirflowException("Did not receive valid event from the triggerer") +class WasbBlobAsyncSensor(WasbBlobSensor): + """ + Polls asynchronously for the existence of a blob in a WASB container. + + :param container_name: name of the container in which the blob should be searched for + :param blob_name: name of the blob to check existence for + :param wasb_conn_id: the connection identifier for connecting to Azure WASB + :param poke_interval: polling period in seconds to check for the status + :param public_read: whether an anonymous public read access should be used. Default is False + :param timeout: Time, in seconds before the task times out and fails. + """ + + def __init__(self, **kwargs: Any) -> None: + warnings.warn( + "Class `WasbBlobAsyncSensor` is deprecated and " + "will be removed in a future release. " + "Please use `WasbBlobSensor` and " + "set `deferrable` attribute to `True` instead", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(**kwargs, deferrable=True) + + class WasbPrefixSensor(BaseSensorOperator): """ Waits for blobs matching a prefix to arrive on Azure Blob Storage. diff --git a/airflow/providers/mysql/hooks/mysql.py b/airflow/providers/mysql/hooks/mysql.py index ea2d912a62c61..e105d8f96a13a 100644 --- a/airflow/providers/mysql/hooks/mysql.py +++ b/airflow/providers/mysql/hooks/mysql.py @@ -19,13 +19,20 @@ from __future__ import annotations import json +import logging from typing import TYPE_CHECKING, Any, Union +from airflow.exceptions import AirflowOptionalProviderFeatureException from airflow.models import Connection from airflow.providers.common.sql.hooks.sql import DbApiHook +logger = logging.getLogger(__name__) + if TYPE_CHECKING: - from mysql.connector.abstracts import MySQLConnectionAbstract + try: + from mysql.connector.abstracts import MySQLConnectionAbstract + except ModuleNotFoundError: + logger.warning("The package 'mysql-connector-python' is not installed. Import skipped") from MySQLdb.connections import Connection as MySQLdbConnection MySQLConnectionTypes = Union["MySQLdbConnection", "MySQLConnectionAbstract"] @@ -181,7 +188,14 @@ def get_conn(self) -> MySQLConnectionTypes: return MySQLdb.connect(**conn_config) if client_name == "mysql-connector-python": - import mysql.connector + try: + import mysql.connector + except ModuleNotFoundError: + raise AirflowOptionalProviderFeatureException( + "The pip package 'mysql-connector-python' is not installed, therefore the connection " + "wasn't established. Please, consider using default driver or pip install the package " + "'mysql-connector-python'. Warning! It might cause dependency conflicts." + ) conn_config = self._get_conn_config_mysql_connector_python(conn) return mysql.connector.connect(**conn_config) diff --git a/airflow/providers/mysql/provider.yaml b/airflow/providers/mysql/provider.yaml index ca2a951939509..ca9a30733af13 100644 --- a/airflow/providers/mysql/provider.yaml +++ b/airflow/providers/mysql/provider.yaml @@ -47,7 +47,6 @@ versions: dependencies: - apache-airflow>=2.3.0 - apache-airflow-providers-common-sql>=1.3.1 - - mysql-connector-python>=8.0.11 - mysqlclient>=1.3.6 integrations: @@ -87,3 +86,8 @@ transfers: connection-types: - hook-class-name: airflow.providers.mysql.hooks.mysql.MySqlHook connection-type: mysql + +additional-extras: + - name: mysql-connector-python + dependencies: + - mysql-connector-python>=8.0.11 diff --git a/airflow/providers/openlineage/CHANGELOG.rst b/airflow/providers/openlineage/CHANGELOG.rst new file mode 100644 index 0000000000000..cef7dda80708a --- /dev/null +++ b/airflow/providers/openlineage/CHANGELOG.rst @@ -0,0 +1,25 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Changelog +--------- + +1.0.0 +..... + +Initial version of the provider. diff --git a/airflow/providers/openlineage/__init__.py b/airflow/providers/openlineage/__init__.py new file mode 100644 index 0000000000000..8373a1d08c475 --- /dev/null +++ b/airflow/providers/openlineage/__init__.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +__all__ = ["version"] + +try: + import importlib_metadata as metadata +except ImportError: + from importlib import metadata # type: ignore[no-redef] + +try: + version = metadata.version("apache-airflow-providers-openlineage") +except metadata.PackageNotFoundError: + import logging + + log = logging.getLogger(__name__) + log.warning("Package metadata could not be found. Overriding it with version found in setup.py") + # TODO: What should be a proper fallback? + # If hardcoded version from provider version + # there's no point to use metadata above + version = "1.0.0.dev" + +del metadata diff --git a/airflow/providers/openlineage/extractors/__init__.py b/airflow/providers/openlineage/extractors/__init__.py new file mode 100644 index 0000000000000..ff74ee86a481b --- /dev/null +++ b/airflow/providers/openlineage/extractors/__init__.py @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage +from airflow.providers.openlineage.extractors.manager import ExtractorManager + +__all__ = ["BaseExtractor", "OperatorLineage", "ExtractorManager"] diff --git a/airflow/providers/openlineage/extractors/base.py b/airflow/providers/openlineage/extractors/base.py new file mode 100644 index 0000000000000..f89d64a1edf56 --- /dev/null +++ b/airflow/providers/openlineage/extractors/base.py @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from attrs import Factory, define + +from airflow.utils.log.logging_mixin import LoggingMixin +from openlineage.client.facet import BaseFacet +from openlineage.client.run import Dataset + + +@define +class OperatorLineage: + """Structure returned from lineage extraction.""" + + inputs: list[Dataset] = Factory(list) + outputs: list[Dataset] = Factory(list) + run_facets: dict[str, BaseFacet] = Factory(dict) + job_facets: dict[str, BaseFacet] = Factory(dict) + + +class BaseExtractor(ABC, LoggingMixin): + """ + Abstract base extractor class. + + This is used mostly to maintain support for custom extractors. + """ + + _allowed_query_params: list[str] = [] + + def __init__(self, operator): # type: ignore + super().__init__() + self.operator = operator + + @classmethod + @abstractmethod + def get_operator_classnames(cls) -> list[str]: + """ + Implement this method returning list of operators that extractor works for. + There are operators which work very similarly and one extractor can cover both. + """ + raise NotImplementedError() + + def validate(self): + assert self.operator.task_type in self.get_operator_classnames() + + @abstractmethod + def extract(self) -> OperatorLineage | None: + pass + + def extract_on_complete(self, task_instance) -> OperatorLineage | None: + return self.extract() + + +class DefaultExtractor(BaseExtractor): + """Extractor that uses `get_openlineage_facets_on_start/complete/failure` methods.""" + + @classmethod + def get_operator_classnames(cls) -> list[str]: + """ + Default extractor is chosen not on the classname basis, but + by existence of get_openlineage_facets method on operator + """ + return [] + + def extract(self) -> OperatorLineage | None: + try: + return self._get_openlineage_facets(self.operator.get_openlineage_facets_on_start) # type: ignore + except AttributeError: + return None + + def extract_on_complete(self, task_instance) -> OperatorLineage | None: + on_complete = getattr(self.operator, "get_openlineage_facets_on_complete", None) + if on_complete and callable(on_complete): + return self._get_openlineage_facets(on_complete, task_instance) + return self.extract() + + def _get_openlineage_facets(self, get_facets_method, *args) -> OperatorLineage | None: + facets: OperatorLineage = get_facets_method(*args) + return OperatorLineage( + inputs=facets.inputs, + outputs=facets.outputs, + run_facets=facets.run_facets, + job_facets=facets.job_facets, + ) diff --git a/airflow/providers/openlineage/extractors/manager.py b/airflow/providers/openlineage/extractors/manager.py new file mode 100644 index 0000000000000..edb21efd3ab4d --- /dev/null +++ b/airflow/providers/openlineage/extractors/manager.py @@ -0,0 +1,161 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os + +from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage +from airflow.providers.openlineage.extractors.base import DefaultExtractor +from airflow.providers.openlineage.plugins.facets import ( + UnknownOperatorAttributeRunFacet, + UnknownOperatorInstance, +) +from airflow.utils.log.logging_mixin import LoggingMixin + + +class ExtractorManager(LoggingMixin): + """Class abstracting management of custom extractors.""" + + def __init__(self): + super().__init__() + self.extractors: dict[str, type[BaseExtractor]] = {} + self.default_extractor = DefaultExtractor + + # Comma-separated extractors in OPENLINEAGE_EXTRACTORS variable. + # Extractors should implement BaseExtractor + from airflow.providers.openlineage.utils import import_from_string + + # TODO: use airflow config with OL backup + env_extractors = os.getenv("OPENLINEAGE_EXTRACTORS") + if env_extractors is not None: + for extractor in env_extractors.split(";"): + extractor: type[BaseExtractor] = import_from_string(extractor.strip()) + for operator_class in extractor.get_operator_classnames(): + self.extractors[operator_class] = extractor + + def add_extractor(self, operator: str, extractor: type[BaseExtractor]): + self.extractors[operator] = extractor + + def extract_metadata(self, dagrun, task, complete: bool = False, task_instance=None) -> OperatorLineage: + extractor = self._get_extractor(task) + task_info = ( + f"task_type={task.task_type} " + f"airflow_dag_id={task.dag_id} " + f"task_id={task.task_id} " + f"airflow_run_id={dagrun.run_id} " + ) + + if extractor: + # Extracting advanced metadata is only possible when extractor for particular operator + # is defined. Without it, we can't extract any input or output data. + try: + self.log.debug("Using extractor %s %s", extractor.__class__.__name__, str(task_info)) + if complete: + task_metadata = extractor.extract_on_complete(task_instance) + else: + task_metadata = extractor.extract() + + self.log.debug("Found task metadata for operation %s: %s", task.task_id, str(task_metadata)) + if task_metadata: + if (not task_metadata.inputs) and (not task_metadata.outputs): + self.extract_inlets_and_outlets(task_metadata, task.inlets, task.outlets) + + return task_metadata + + except Exception as e: + self.log.exception( + "Failed to extract metadata using found extractor %s - %s %s", extractor, e, task_info + ) + else: + self.log.debug("Unable to find an extractor %s", task_info) + + # Only include the unkonwnSourceAttribute facet if there is no extractor + task_metadata = OperatorLineage( + run_facets={ + "unknownSourceAttribute": UnknownOperatorAttributeRunFacet( + unknownItems=[ + UnknownOperatorInstance( + name=task.task_type, + properties={attr: value for attr, value in task.__dict__.items()}, + ) + ] + ) + }, + ) + inlets = task.get_inlet_defs() + outlets = task.get_outlet_defs() + self.extract_inlets_and_outlets(task_metadata, inlets, outlets) + return task_metadata + + return OperatorLineage() + + def _get_extractor_class(self, clazz: type) -> type[BaseExtractor] | None: + name = clazz.__name__ + if name in self.extractors: + return self.extractors[name] + + def method_exists(method_name): + method = getattr(clazz, method_name, None) + if method: + return callable(method) + + if method_exists("get_openlineage_facets_on_start") or method_exists( + "get_openlineage_facets_on_complete" + ): + return self.default_extractor + return None + + def _get_extractor(self, task) -> BaseExtractor | None: + # TODO: Re-enable in Extractor PR + # self.instantiate_abstract_extractors(task) + extractor = self._get_extractor_class(task.task_type) + self.log.debug("extractor for %s is %s", task.__class__, extractor) + if extractor: + return extractor(task) + return None + + def extract_inlets_and_outlets( + self, + task_metadata: OperatorLineage, + inlets: list, + outlets: list, + ): + self.log.debug("Manually extracting lineage metadata from inlets and outlets") + for i in inlets: + d = self.convert_to_ol_dataset(i) + if d: + task_metadata.inputs.append(d) + for o in outlets: + d = self.convert_to_ol_dataset(o) + if d: + task_metadata.outputs.append(d) + + @staticmethod + def convert_to_ol_dataset(obj): + from airflow.lineage.entities import Table + from openlineage.client.run import Dataset + + if isinstance(obj, Dataset): + return obj + elif isinstance(obj, Table): + return Dataset( + namespace=f"{obj.cluster}", + name=f"{obj.database}.{obj.name}", + facets={}, + ) + else: + return None diff --git a/airflow/providers/openlineage/plugins/__init__.py b/airflow/providers/openlineage/plugins/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/openlineage/plugins/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/openlineage/plugins/adapter.py b/airflow/providers/openlineage/plugins/adapter.py new file mode 100644 index 0000000000000..2344839852c67 --- /dev/null +++ b/airflow/providers/openlineage/plugins/adapter.py @@ -0,0 +1,306 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +import uuid +from typing import TYPE_CHECKING + +import requests.exceptions + +from airflow.providers.openlineage import version as OPENLINEAGE_PROVIDER_VERSION +from airflow.providers.openlineage.extractors import OperatorLineage +from airflow.providers.openlineage.utils.utils import OpenLineageRedactor +from airflow.utils.log.logging_mixin import LoggingMixin +from openlineage.client import OpenLineageClient, set_producer +from openlineage.client.facet import ( + BaseFacet, + DocumentationJobFacet, + ErrorMessageRunFacet, + NominalTimeRunFacet, + OwnershipJobFacet, + OwnershipJobFacetOwners, + ParentRunFacet, + ProcessingEngineRunFacet, + SourceCodeLocationJobFacet, +) +from openlineage.client.run import Job, Run, RunEvent, RunState + +if TYPE_CHECKING: + from airflow.models.dagrun import DagRun + from airflow.utils.log.secrets_masker import SecretsMasker + +_DAG_DEFAULT_NAMESPACE = "default" + +_DAG_NAMESPACE = os.getenv("OPENLINEAGE_NAMESPACE", _DAG_DEFAULT_NAMESPACE) + +_PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/" f"{OPENLINEAGE_PROVIDER_VERSION}" + +set_producer(_PRODUCER) + + +class OpenLineageAdapter(LoggingMixin): + """ + Adapter for translating Airflow metadata to OpenLineage events, + instead of directly creating them from Airflow code. + """ + + def __init__(self, client: OpenLineageClient | None = None, secrets_masker: SecretsMasker | None = None): + super().__init__() + self._client = client or OpenLineageClient.from_environment() + if not secrets_masker: + from airflow.utils.log.secrets_masker import _secrets_masker + + secrets_masker = _secrets_masker() + self._redacter = OpenLineageRedactor.from_masker(secrets_masker) + + def get_or_create_openlineage_client(self) -> OpenLineageClient: + if not self._client: + self._client = OpenLineageClient.from_environment() + return self._client + + def build_dag_run_id(self, dag_id, dag_run_id): + return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{_DAG_NAMESPACE}.{dag_id}.{dag_run_id}")) + + @staticmethod + def build_task_instance_run_id(task_id, execution_date, try_number): + return str( + uuid.uuid3( + uuid.NAMESPACE_URL, + f"{_DAG_NAMESPACE}.{task_id}.{execution_date}.{try_number}", + ) + ) + + def emit(self, event: RunEvent): + event = self._redacter.redact(event, max_depth=20) + try: + return self._client.emit(event) + except requests.exceptions.RequestException: + self.log.exception(f"Failed to emit OpenLineage event of id {event.run.runId}") + + def start_task( + self, + run_id: str, + job_name: str, + job_description: str, + event_time: str, + parent_job_name: str | None, + parent_run_id: str | None, + code_location: str | None, + nominal_start_time: str, + nominal_end_time: str, + owners: list[str], + task: OperatorLineage | None, + run_facets: dict[str, type[BaseFacet]] | None = None, # Custom run facets + ): + """ + Emits openlineage event of type START + + :param run_id: globally unique identifier of task in dag run + :param job_name: globally unique identifier of task in dag + :param job_description: user provided description of job + :param event_time: + :param parent_job_name: the name of the parent job (typically the DAG, + but possibly a task group) + :param parent_run_id: identifier of job spawning this task + :param code_location: file path or URL of DAG file + :param nominal_start_time: scheduled time of dag run + :param nominal_end_time: following schedule of dag run + :param owners: list of owners of DAG + :param task: metadata container with information extracted from operator + :param run_facets: custom run facets + """ + from airflow.version import version as AIRFLOW_VERSION + + processing_engine_version_facet = ProcessingEngineRunFacet( + version=AIRFLOW_VERSION, + name="Airflow", + openlineageAdapterVersion=OPENLINEAGE_PROVIDER_VERSION, + ) + + if not run_facets: + run_facets = {} + run_facets["processing_engine"] = processing_engine_version_facet # type: ignore + event = RunEvent( + eventType=RunState.START, + eventTime=event_time, + run=self._build_run( + run_id, + parent_job_name, + parent_run_id, + job_name, + nominal_start_time, + nominal_end_time, + run_facets=run_facets, + ), + job=self._build_job( + job_name=job_name, + job_description=job_description, + code_location=code_location, + owners=owners, + job_facets=task.job_facets if task else None, + ), + inputs=task.inputs if task else [], + outputs=task.outputs if task else [], + producer=_PRODUCER, + ) + self.emit(event) + + def complete_task(self, run_id: str, job_name: str, end_time: str, task: OperatorLineage): + """ + Emits openlineage event of type COMPLETE + :param run_id: globally unique identifier of task in dag run + :param job_name: globally unique identifier of task between dags + :param end_time: time of task completion + :param task: metadata container with information extracted from operator + """ + event = RunEvent( + eventType=RunState.COMPLETE, + eventTime=end_time, + run=self._build_run(run_id, run_facets=task.run_facets), + job=self._build_job(job_name, job_facets=task.job_facets), + inputs=task.inputs, + outputs=task.outputs, + producer=_PRODUCER, + ) + self.emit(event) + + def fail_task(self, run_id: str, job_name: str, end_time: str, task: OperatorLineage): + """ + Emits openlineage event of type FAIL + :param run_id: globally unique identifier of task in dag run + :param job_name: globally unique identifier of task between dags + :param end_time: time of task completion + :param task: metadata container with information extracted from operator + """ + event = RunEvent( + eventType=RunState.FAIL, + eventTime=end_time, + run=self._build_run(run_id, run_facets=task.run_facets), + job=self._build_job(job_name), + inputs=task.inputs, + outputs=task.outputs, + producer=_PRODUCER, + ) + self.emit(event) + + def dag_started( + self, + dag_run: DagRun, + msg: str, + nominal_start_time: str, + nominal_end_time: str, + ): + event = RunEvent( + eventType=RunState.START, + eventTime=dag_run.start_date.isoformat(), + job=Job(name=dag_run.dag_id, namespace=_DAG_NAMESPACE), + run=self._build_run( + run_id=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id), + nominal_start_time=nominal_start_time, + nominal_end_time=nominal_end_time, + ), + inputs=[], + outputs=[], + producer=_PRODUCER, + ) + self.emit(event) + + def dag_success(self, dag_run: DagRun, msg: str): + event = RunEvent( + eventType=RunState.COMPLETE, + eventTime=dag_run.end_date.isoformat(), + job=Job(name=dag_run.dag_id, namespace=_DAG_NAMESPACE), + run=Run(runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id)), + inputs=[], + outputs=[], + producer=_PRODUCER, + ) + self.emit(event) + + def dag_failed(self, dag_run: DagRun, msg: str): + event = RunEvent( + eventType=RunState.FAIL, + eventTime=dag_run.end_date.isoformat(), + job=Job(name=dag_run.dag_id, namespace=_DAG_NAMESPACE), + run=Run( + runId=self.build_dag_run_id(dag_run.dag_id, dag_run.run_id), + facets={"errorMessage": ErrorMessageRunFacet(message=msg, programmingLanguage="python")}, + ), + inputs=[], + outputs=[], + producer=_PRODUCER, + ) + self.emit(event) + + @staticmethod + def _build_run( + run_id: str, + parent_job_name: str | None = None, + parent_run_id: str | None = None, + job_name: str | None = None, + nominal_start_time: str | None = None, + nominal_end_time: str | None = None, + run_facets: dict[str, BaseFacet] | None = None, + ) -> Run: + facets = {} + if nominal_start_time: + facets.update({"nominalTime": NominalTimeRunFacet(nominal_start_time, nominal_end_time)}) + if parent_run_id: + parent_run_facet = ParentRunFacet.create( + runId=parent_run_id, + namespace=_DAG_NAMESPACE, + name=parent_job_name or job_name, + ) + facets.update( + { + "parent": parent_run_facet, + "parentRun": parent_run_facet, # Keep sending this for the backward compatibility + } + ) + + if run_facets: + facets.update(run_facets) + + return Run(run_id, facets) + + @staticmethod + def _build_job( + job_name: str, + job_description: str | None = None, + code_location: str | None = None, + owners: list[str] | None = None, + job_facets: dict[str, BaseFacet] | None = None, + ): + facets = {} + + if job_description: + facets.update({"documentation": DocumentationJobFacet(description=job_description)}) + if code_location: + facets.update({"sourceCodeLocation": SourceCodeLocationJobFacet("", url=code_location)}) + if owners: + facets.update( + { + "ownership": OwnershipJobFacet( + owners=[OwnershipJobFacetOwners(name=owner) for owner in owners] + ) + } + ) + if job_facets: + facets = {**facets, **job_facets} + + return Job(_DAG_NAMESPACE, job_name, facets) diff --git a/airflow/providers/openlineage/plugins/facets.py b/airflow/providers/openlineage/plugins/facets.py new file mode 100644 index 0000000000000..a5a41e54a60e1 --- /dev/null +++ b/airflow/providers/openlineage/plugins/facets.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from attrs import define + +from openlineage.client.facet import BaseFacet +from openlineage.client.utils import RedactMixin + + +@define(slots=False) +class AirflowMappedTaskRunFacet(BaseFacet): + """Run facet containing information about mapped tasks""" + + mapIndex: int + operatorClass: str + + _additional_skip_redact: list[str] = ["operatorClass"] + + @classmethod + def from_task_instance(cls, task_instance): + task = task_instance.task + from airflow.providers.openlineage.utils import get_operator_class + + return cls( + mapIndex=task_instance.map_index, + operatorClass=f"{get_operator_class(task).__module__}.{get_operator_class(task).__name__}", + ) + + +@define(slots=False) +class AirflowRunFacet(BaseFacet): + """Composite Airflow run facet.""" + + dag: dict + dagRun: dict + task: dict + taskInstance: dict + taskUuid: str + + +@define(slots=False) +class UnknownOperatorInstance(RedactMixin): + """ + Describes an unknown operator - specifies the (class) name of the operator + and its properties + """ + + name: str + properties: dict[str, object] + type: str = "operator" + + _skip_redact: list[str] = ["name", "type"] + + +@define(slots=False) +class UnknownOperatorAttributeRunFacet(BaseFacet): + """RunFacet that describes unknown operators in an Airflow DAG""" + + unknownItems: list[UnknownOperatorInstance] diff --git a/airflow/providers/openlineage/plugins/listener.py b/airflow/providers/openlineage/plugins/listener.py new file mode 100644 index 0000000000000..9b57bf919fb66 --- /dev/null +++ b/airflow/providers/openlineage/plugins/listener.py @@ -0,0 +1,191 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from concurrent.futures import Executor, ThreadPoolExecutor +from typing import TYPE_CHECKING + +from airflow.listeners import hookimpl +from airflow.providers.openlineage.extractors import ExtractorManager +from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter +from airflow.providers.openlineage.utils.utils import ( + get_airflow_run_facet, + get_custom_facets, + get_job_name, + print_exception, +) +from airflow.utils.timeout import timeout + +if TYPE_CHECKING: + from sqlalchemy.orm import Session + + from airflow.models import DagRun, TaskInstance + + +class OpenLineageListener: + """ + OpenLineage listener + Sends events on task instance and dag run starts, completes and failures. + """ + + def __init__(self): + self.log = logging.getLogger(__name__) + self.executor: Executor = None # type: ignore + self.extractor_manager = ExtractorManager() + self.adapter = OpenLineageAdapter() + + @hookimpl + def on_task_instance_running( + self, previous_state, task_instance: TaskInstance, session: Session # This will always be QUEUED + ): + if not hasattr(task_instance, "task"): + self.log.warning( + f"No task set for TI object task_id: {task_instance.task_id} - " + f"dag_id: {task_instance.dag_id} - run_id {task_instance.run_id}" + ) + return + + self.log.debug("OpenLineage listener got notification about task instance start") + dagrun = task_instance.dag_run + task = task_instance.task + dag = task.dag + + @print_exception + def on_running(): + # that's a workaround to detect task running from deferred state + # we return here because Airflow 2.3 needs task from deferred state + if task_instance.next_method is not None: + return + parent_run_id = self.adapter.build_dag_run_id(dag.dag_id, dagrun.run_id) + + task_uuid = self.adapter.build_task_instance_run_id( + task.task_id, task_instance.execution_date, task_instance.try_number + ) + + task_metadata = self.extractor_manager.extract_metadata(dagrun, task) + + self.adapter.start_task( + run_id=task_uuid, + job_name=get_job_name(task), + job_description=dag.description, + event_time=task_instance.start_date.isoformat(), + parent_job_name=dag.dag_id, + parent_run_id=parent_run_id, + code_location=None, + nominal_start_time=dagrun.data_interval_start.isoformat(), + nominal_end_time=dagrun.data_interval_end.isoformat(), + owners=dag.owner.split(", "), + task=task_metadata, + run_facets={ + **task_metadata.run_facets, + **get_custom_facets(task_instance), + **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid), + }, + ) + + self.executor.submit(on_running) + + @hookimpl + def on_task_instance_success(self, previous_state, task_instance: TaskInstance, session): + self.log.debug("OpenLineage listener got notification about task instance success") + + dagrun = task_instance.dag_run + task = task_instance.task + + task_uuid = OpenLineageAdapter.build_task_instance_run_id( + task.task_id, task_instance.execution_date, task_instance.try_number - 1 + ) + + @print_exception + def on_success(): + task_metadata = self.extractor_manager.extract_metadata( + dagrun, task, complete=True, task_instance=task_instance + ) + self.adapter.complete_task( + run_id=task_uuid, + job_name=get_job_name(task), + end_time=task_instance.end_date.isoformat(), + task=task_metadata, + ) + + self.executor.submit(on_success) + + @hookimpl + def on_task_instance_failed(self, previous_state, task_instance: TaskInstance, session): + self.log.debug("OpenLineage listener got notification about task instance failure") + + dagrun = task_instance.dag_run + task = task_instance.task + + task_uuid = OpenLineageAdapter.build_task_instance_run_id( + task.task_id, task_instance.execution_date, task_instance.try_number - 1 + ) + + @print_exception + def on_failure(): + task_metadata = self.extractor_manager.extract_metadata( + dagrun, task, complete=True, task_instance=task_instance + ) + + self.adapter.fail_task( + run_id=task_uuid, + job_name=get_job_name(task), + end_time=task_instance.end_date.isoformat(), + task=task_metadata, + ) + + self.executor.submit(on_failure) + + @hookimpl + def on_starting(self, component): + self.log.debug("on_starting: %s", component.__class__.__name__) + self.executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="openlineage_") + + @hookimpl + def before_stopping(self, component): + self.log.debug("before_stopping: %s", component.__class__.__name__) + # TODO: configure this with Airflow config + with timeout(30): + self.executor.shutdown(wait=True) + + @hookimpl + def on_dag_run_running(self, dag_run: DagRun, msg: str): + if not self.executor: + self.log.error("Executor have not started before `on_dag_run_running`") + return + self.executor.submit( + self.adapter.dag_started, + dag_run=dag_run, + msg=msg, + nominal_start_time=dag_run.data_interval_start.isoformat(), + nominal_end_time=dag_run.data_interval_end.isoformat(), + ) + + @hookimpl + def on_dag_run_success(self, dag_run: DagRun, msg: str): + if not self.executor: + self.log.error("Executor have not started before `on_dag_run_success`") + return + self.executor.submit(self.adapter.dag_success, dag_run=dag_run, msg=msg) + + @hookimpl + def on_dag_run_failed(self, dag_run: DagRun, msg: str): + if not self.executor: + self.log.error("Executor have not started before `on_dag_run_failed`") + return + self.executor.submit(self.adapter.dag_failed, dag_run=dag_run, msg=msg) diff --git a/airflow/providers/openlineage/plugins/macros.py b/airflow/providers/openlineage/plugins/macros.py new file mode 100644 index 0000000000000..0e445682a9cf4 --- /dev/null +++ b/airflow/providers/openlineage/plugins/macros.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +import typing + +from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter + +if typing.TYPE_CHECKING: + from airflow.models import TaskInstance + +_JOB_NAMESPACE = os.getenv("OPENLINEAGE_NAMESPACE", "default") + + +def lineage_run_id(task_instance: TaskInstance): + """ + Macro function which returns the generated run id for a given task. This + can be used to forward the run id from a task to a child run so the job + hierarchy is preserved. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/macros:openlineage` + """ + return OpenLineageAdapter.build_task_instance_run_id( + task_instance.task.task_id, task_instance.execution_date, task_instance.try_number + ) + + +def lineage_parent_id(run_id: str, task_instance: TaskInstance): + """ + Macro function which returns the generated job and run id for a given task. This + can be used to forward the ids from a task to a child run so the job + hierarchy is preserved. Child run can create ParentRunFacet from those ids. + + .. seealso:: + For more information on how to use this macro, take a look at the guide: + :ref:`howto/macros:openlineage` + """ + job_name = OpenLineageAdapter.build_task_instance_run_id( + task_instance.task.task_id, task_instance.execution_date, task_instance.try_number + ) + return f"{_JOB_NAMESPACE}/{job_name}/{run_id}" diff --git a/airflow/providers/openlineage/plugins/openlineage.py b/airflow/providers/openlineage/plugins/openlineage.py new file mode 100644 index 0000000000000..a53c5cf4e1c13 --- /dev/null +++ b/airflow/providers/openlineage/plugins/openlineage.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os + +from airflow.plugins_manager import AirflowPlugin +from airflow.providers.openlineage.plugins.macros import lineage_parent_id, lineage_run_id + + +def _is_disabled() -> bool: + return os.getenv("OPENLINEAGE_DISABLED", "false").lower() == "true" + + +class OpenLineageProviderPlugin(AirflowPlugin): + """OpenLineage Plugin provides listener that emits OL events on DAG start, complete and failure + and TaskInstances start, complete and failure. + """ + + name = "OpenLineageProviderPlugin" + macros = [lineage_run_id, lineage_parent_id] + if _is_disabled(): + from airflow.providers.openlineage.plugins.listener import OpenLineageListener + + listeners = [OpenLineageListener()] diff --git a/airflow/providers/openlineage/provider.yaml b/airflow/providers/openlineage/provider.yaml new file mode 100644 index 0000000000000..b13a551506cbc --- /dev/null +++ b/airflow/providers/openlineage/provider.yaml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-openlineage +name: OpenLineage Airflow +description: | + `OpenLineage `__ + +suspended: false +versions: + - 1.0.0 + +dependencies: + # This provider depends on Airflow features available in later versions, like + # lifecycle notifications https://github.com/apache/airflow/pull/27855 + # or direct calls to Listener API https://github.com/apache/airflow/pull/29289 + - apache-airflow>=2.5.2 + - apache-airflow-providers-common-sql>=1.3.1 + - attrs>=22.2 + - openlineage-integration-common>=0.22.0 + - openlineage-python>=0.22.0 + +integrations: + - integration-name: OpenLineage + external-doc-url: https://openlineage.io + logo: /integration-logos/openlineage/openlineage.svg + tags: [protocol] + +plugins: + - name: openlineage + plugin-class: airflow.providers.openlineage.plugins.openlineage.OpenLineageProviderPlugin diff --git a/airflow/providers/openlineage/utils/__init__.py b/airflow/providers/openlineage/utils/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/openlineage/utils/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/openlineage/utils/utils.py b/airflow/providers/openlineage/utils/utils.py new file mode 100644 index 0000000000000..a95c0edb32491 --- /dev/null +++ b/airflow/providers/openlineage/utils/utils.py @@ -0,0 +1,397 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import datetime +import json +import logging +from contextlib import suppress +from functools import wraps +from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse + +import attrs +from attrs import asdict + +from airflow.providers.openlineage.plugins.facets import ( + AirflowMappedTaskRunFacet, + AirflowRunFacet, +) +from airflow.utils.log.secrets_masker import Redactable, Redacted, SecretsMasker, should_hide_value_for_key + +# TODO: move this maybe to Airflow's logic? +from openlineage.client.utils import RedactMixin + +if TYPE_CHECKING: + from airflow.models import DAG, BaseOperator, Connection, DagRun, TaskInstance + + +log = logging.getLogger(__name__) +_NOMINAL_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" + + +def openlineage_job_name(dag_id: str, task_id: str) -> str: + return f"{dag_id}.{task_id}" + + +def get_operator_class(task: BaseOperator) -> type: + if task.__class__.__name__ in ("DecoratedMappedOperator", "MappedOperator"): + return task.operator_class + return task.__class__ + + +def to_json_encodable(task: BaseOperator) -> dict[str, object]: + def _task_encoder(obj): + from airflow.models import DAG + + if isinstance(obj, datetime.datetime): + return obj.isoformat() + elif isinstance(obj, DAG): + return { + "dag_id": obj.dag_id, + "tags": obj.tags, + "schedule_interval": obj.schedule_interval, + "timetable": obj.timetable.serialize(), + } + else: + return str(obj) + + return json.loads(json.dumps(task.__dict__, default=_task_encoder)) + + +def url_to_https(url) -> str | None: + # Ensure URL exists + if not url: + return None + + base_url = None + if url.startswith("git@"): + part = url.split("git@")[1:2] + if part: + base_url = f'https://{part[0].replace(":", "/", 1)}' + elif url.startswith("https://"): + base_url = url + + if not base_url: + raise ValueError(f"Unable to extract location from: {url}") + + if base_url.endswith(".git"): + base_url = base_url[:-4] + return base_url + + +def redacted_connection_uri(conn: Connection, filtered_params=None, filtered_prefixes=None): + """ + Return the connection URI for the given Connection. + This method additionally filters URI by removing query parameters that are known to carry sensitive data + like username, password, access key. + """ + if filtered_prefixes is None: + filtered_prefixes = [] + if filtered_params is None: + filtered_params = [] + + def filter_key_params(k: str): + return k not in filtered_params and any(substr in k for substr in filtered_prefixes) + + conn_uri = conn.get_uri() + parsed = urlparse(conn_uri) + + # Remove username and password + netloc = f"{parsed.hostname}" + (f":{parsed.port}" if parsed.port else "") + parsed = parsed._replace(netloc=netloc) + if parsed.query: + query_dict = dict(parse_qsl(parsed.query)) + if conn.EXTRA_KEY in query_dict: + query_dict = json.loads(query_dict[conn.EXTRA_KEY]) + filtered_qs = {k: v for k, v in query_dict.items() if not filter_key_params(k)} + parsed = parsed._replace(query=urlencode(filtered_qs)) + return urlunparse(parsed) + + +def get_connection(conn_id) -> Connection | None: + from airflow.hooks.base import BaseHook + + with suppress(Exception): + return BaseHook.get_connection(conn_id=conn_id) + return None + + +def get_job_name(task): + return f"{task.dag_id}.{task.task_id}" + + +def get_custom_facets(task_instance: TaskInstance | None = None) -> dict[str, Any]: + custom_facets = {} + # check for -1 comes from SmartSensor compatibility with dynamic task mapping + # this comes from Airflow code + if hasattr(task_instance, "map_index") and getattr(task_instance, "map_index") != -1: + custom_facets["airflow_mappedTask"] = AirflowMappedTaskRunFacet.from_task_instance(task_instance) + return custom_facets + + +class InfoJsonEncodable(dict): + """ + Airflow objects might not be json-encodable overall. + + The class provides additional attributes to control + what and how is encoded: + + * renames: a dictionary of attribute name changes + * | casts: a dictionary consisting of attribute names + | and corresponding methods that should change + | object value + * includes: list of attributes to be included in encoding + * excludes: list of attributes to be excluded from encoding + + Don't use both includes and excludes. + """ + + renames: dict[str, str] = {} + casts: dict[str, Any] = {} + includes: list[str] = [] + excludes: list[str] = [] + + def __init__(self, obj): + self.obj = obj + self._fields = [] + + self._cast_fields() + self._rename_fields() + self._include_fields() + dict.__init__( + self, + **{field: InfoJsonEncodable._cast_basic_types(getattr(self, field)) for field in self._fields}, + ) + + @staticmethod + def _cast_basic_types(value): + if isinstance(value, datetime.datetime): + return value.isoformat() + if isinstance(value, (set, list, tuple)): + return str(list(value)) + return value + + def _rename_fields(self): + for field, renamed in self.renames.items(): + if hasattr(self.obj, field): + setattr(self, renamed, getattr(self.obj, field)) + self._fields.append(renamed) + + def _cast_fields(self): + for field, func in self.casts.items(): + setattr(self, field, func(self.obj)) + self._fields.append(field) + + def _include_fields(self): + if self.includes and self.excludes: + raise Exception("Don't use both includes and excludes.") + if self.includes: + for field in self.includes: + if field in self._fields or not hasattr(self.obj, field): + continue + setattr(self, field, getattr(self.obj, field)) + self._fields.append(field) + else: + for field, val in self.obj.__dict__.items(): + if field in self._fields or field in self.excludes or field in self.renames: + continue + setattr(self, field, val) + self._fields.append(field) + + +class DagInfo(InfoJsonEncodable): + """Defines encoding DAG object to JSON.""" + + includes = ["dag_id", "schedule_interval", "tags", "start_date"] + casts = {"timetable": lambda dag: dag.timetable.serialize() if getattr(dag, "timetable", None) else None} + renames = {"_dag_id": "dag_id"} + + +class DagRunInfo(InfoJsonEncodable): + """Defines encoding DagRun object to JSON.""" + + includes = [ + "conf", + "dag_id", + "data_interval_start", + "data_interval_end", + "external_trigger", + "run_id", + "run_type", + "start_date", + ] + + +class TaskInstanceInfo(InfoJsonEncodable): + """Defines encoding TaskInstance object to JSON.""" + + includes = ["duration", "try_number", "pool"] + casts = { + "map_index": lambda ti: ti.map_index + if hasattr(ti, "map_index") and getattr(ti, "map_index") != -1 + else None + } + + +class TaskInfo(InfoJsonEncodable): + """Defines encoding BaseOperator/AbstractOperator object to JSON.""" + + renames = { + "_BaseOperator__init_kwargs": "args", + "_BaseOperator__from_mapped": "mapped", + "_downstream_task_ids": "downstream_task_ids", + "_upstream_task_ids": "upstream_task_ids", + } + excludes = [ + "_BaseOperator__instantiated", + "_dag", + "_hook", + "_log", + "_outlets", + "_inlets", + "_lock_for_execution", + "handler", + "params", + "python_callable", + "retry_delay", + ] + casts = { + "operator_class": lambda task: task.task_type, + "task_group": lambda task: TaskGroupInfo(task.task_group) + if hasattr(task, "task_group") and getattr(task.task_group, "_group_id", None) + else None, + } + + +class TaskGroupInfo(InfoJsonEncodable): + """Defines encoding TaskGroup object to JSON.""" + + renames = { + "_group_id": "group_id", + } + includes = [ + "downstream_group_ids", + "downstream_task_ids", + "prefix_group_id", + "tooltip", + "upstream_group_ids", + "upstream_task_ids", + ] + + +def get_airflow_run_facet( + dag_run: DagRun, + dag: DAG, + task_instance: TaskInstance, + task: BaseOperator, + task_uuid: str, +): + return { + "airflow": json.loads( + json.dumps( + asdict( + AirflowRunFacet( + dag=DagInfo(dag), + dagRun=DagRunInfo(dag_run), + taskInstance=TaskInstanceInfo(task_instance), + task=TaskInfo(task), + taskUuid=task_uuid, + ) + ), + default=str, + ) + ) + } + + +class OpenLineageRedactor(SecretsMasker): + """This class redacts sensitive data similar to SecretsMasker in Airflow logs. + The difference is that our default max recursion depth is way higher - due to + the structure of OL events we need more depth. + Additionally, we allow data structures to specify data that needs not to be + redacted by specifying _skip_redact list by deriving RedactMixin. + """ + + @classmethod + def from_masker(cls, other: SecretsMasker) -> OpenLineageRedactor: + instance = cls() + instance.patterns = other.patterns + instance.replacer = other.replacer + return instance + + def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int) -> Redacted: + if depth > max_depth: + return item + try: + if name and should_hide_value_for_key(name): + return self._redact_all(item, depth, max_depth) + if attrs.has(type(item)): + # TODO: fixme when mypy gets compatible with new attrs + for dict_key, subval in attrs.asdict(item, recurse=False).items(): # type: ignore[arg-type] + if _is_name_redactable(dict_key, item): + setattr( + item, + dict_key, + self._redact(subval, name=dict_key, depth=(depth + 1), max_depth=max_depth), + ) + return item + elif is_json_serializable(item) and hasattr(item, "__dict__"): + for dict_key, subval in item.__dict__.items(): + if _is_name_redactable(dict_key, item): + setattr( + item, + dict_key, + self._redact(subval, name=dict_key, depth=(depth + 1), max_depth=max_depth), + ) + return item + else: + return super()._redact(item, name, depth, max_depth) + except Exception as e: + log.warning( + "Unable to redact %s" "Error was: %s: %s", + repr(item), + type(e).__name__, + str(e), + ) + return item + + +def is_json_serializable(item): + try: + json.dumps(item) + return True + except (TypeError, ValueError): + return False + + +def _is_name_redactable(name, redacted): + if not issubclass(redacted.__class__, RedactMixin): + return not name.startswith("_") + return name not in redacted.skip_redact + + +def print_exception(f): + @wraps(f) + def wrapper(*args, **kwargs): + try: + return f(*args, **kwargs) + except Exception as e: + log.exception(e) + + return wrapper diff --git a/airflow/serialization/pydantic/taskinstance.py b/airflow/serialization/pydantic/taskinstance.py index 6398541d0be44..9c87186873580 100644 --- a/airflow/serialization/pydantic/taskinstance.py +++ b/airflow/serialization/pydantic/taskinstance.py @@ -111,4 +111,4 @@ def get_dagrun(self) -> DagRunPydantic: :return: Pydantic serialized version of DaGrun """ - return DagRunPydantic() + raise NotImplementedError() diff --git a/airflow/serialization/serde.py b/airflow/serialization/serde.py index 3429c75f86971..5b195939843aa 100644 --- a/airflow/serialization/serde.py +++ b/airflow/serialization/serde.py @@ -157,7 +157,7 @@ def serialize(o: object, depth: int = 0) -> U | None: # dataclasses if dataclasses.is_dataclass(cls): # fixme: unfortunately using asdict with nested dataclasses it looses information - data = dataclasses.asdict(o) + data = dataclasses.asdict(o) # type: ignore[call-overload] dct[DATA] = serialize(data, depth + 1) return dct diff --git a/airflow/stats.py b/airflow/stats.py index 8785569682a30..da31e7d913eb9 100644 --- a/airflow/stats.py +++ b/airflow/stats.py @@ -24,7 +24,7 @@ import string import time from functools import partial, wraps -from typing import TYPE_CHECKING, Callable, Iterable, TypeVar, cast +from typing import TYPE_CHECKING, Callable, Iterable, TypeVar, Union, cast from airflow.configuration import conf from airflow.exceptions import AirflowConfigException, InvalidStatsNameException @@ -35,6 +35,7 @@ from statsd import StatsClient log = logging.getLogger(__name__) +DeltaType = Union[int, float, datetime.timedelta] class TimerProtocol(Protocol): @@ -96,7 +97,7 @@ def gauge( def timing( cls, stat: str, - dt: int | float | datetime.timedelta, + dt: DeltaType | None, *, tags: dict[str, str] | None = None, ) -> None: @@ -333,9 +334,7 @@ def gauge( """Gauge stat.""" @classmethod - def timing( - cls, stat: str, dt: int | float | datetime.timedelta, *, tags: dict[str, str] | None = None - ) -> None: + def timing(cls, stat: str, dt: DeltaType, *, tags: dict[str, str] | None = None) -> None: """Stats timing.""" @classmethod @@ -410,7 +409,7 @@ def gauge( def timing( self, stat: str, - dt: int | float | datetime.timedelta, + dt: DeltaType, *, tags: dict[str, str] | None = None, ) -> None: @@ -514,7 +513,7 @@ def gauge( def timing( self, stat: str, - dt: int | float | datetime.timedelta, + dt: DeltaType, *, tags: dict[str, str] | None = None, ) -> None: diff --git a/airflow/ti_deps/deps/task_concurrency_dep.py b/airflow/ti_deps/deps/task_concurrency_dep.py index 5b5f4f515acf0..1f1416214c7a4 100644 --- a/airflow/ti_deps/deps/task_concurrency_dep.py +++ b/airflow/ti_deps/deps/task_concurrency_dep.py @@ -30,13 +30,22 @@ class TaskConcurrencyDep(BaseTIDep): @provide_session def _get_dep_statuses(self, ti, session, dep_context): - if ti.task.max_active_tis_per_dag is None: + if ti.task.max_active_tis_per_dag is None and ti.task.max_active_tis_per_dagrun is None: yield self._passing_status(reason="Task concurrency is not set.") return - if ti.get_num_running_task_instances(session) >= ti.task.max_active_tis_per_dag: + if ( + ti.task.max_active_tis_per_dag is not None + and ti.get_num_running_task_instances(session) >= ti.task.max_active_tis_per_dag + ): yield self._failing_status(reason="The max task concurrency has been reached.") return - else: - yield self._passing_status(reason="The max task concurrency has not been reached.") + if ( + ti.task.max_active_tis_per_dagrun is not None + and ti.get_num_running_task_instances(session, same_dagrun=True) + >= ti.task.max_active_tis_per_dagrun + ): + yield self._failing_status(reason="The max task concurrency per run has been reached.") return + yield self._passing_status(reason="The max task concurrency has not been reached.") + return diff --git a/airflow/triggers/external_task.py b/airflow/triggers/external_task.py index 5ed0d3b5e3f92..883753401c58f 100644 --- a/airflow/triggers/external_task.py +++ b/airflow/triggers/external_task.py @@ -144,7 +144,7 @@ async def run(self) -> typing.AsyncIterator["TriggerEvent"]: while True: num_dags = await self.count_dags() if num_dags == len(self.execution_dates): - yield TriggerEvent(True) + yield TriggerEvent(self.serialize()) await asyncio.sleep(self.poll_interval) @sync_to_async diff --git a/airflow/utils/file.py b/airflow/utils/file.py index b3b1e8f3d187f..81089e06d4c26 100644 --- a/airflow/utils/file.py +++ b/airflow/utils/file.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import ast import io import logging import os @@ -371,3 +372,23 @@ def might_contain_dag_via_default_heuristic(file_path: str, zip_file: zipfile.Zi content = dag_file.read() content = content.lower() return all(s in content for s in (b"dag", b"airflow")) + + +def _find_imported_modules(module: ast.Module) -> Generator[str, None, None]: + for st in module.body: + if isinstance(st, ast.Import): + for n in st.names: + yield n.name + elif isinstance(st, ast.ImportFrom) and st.module is not None: + yield st.module + + +def iter_airflow_imports(file_path: str) -> Generator[str, None, None]: + """Find Airflow modules imported in the given file.""" + try: + parsed = ast.parse(Path(file_path).read_bytes()) + except (OSError, SyntaxError, UnicodeDecodeError): + return + for m in _find_imported_modules(parsed): + if m.startswith("airflow."): + yield m diff --git a/airflow/utils/hashlib_wrapper.py b/airflow/utils/hashlib_wrapper.py new file mode 100644 index 0000000000000..2415f3d00e94e --- /dev/null +++ b/airflow/utils/hashlib_wrapper.py @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import hashlib + +from airflow import PY39 + + +def md5(data: bytes, *, usedforsecurity: bool | None = None): + """ + Safely allows calling the hashlib.md5 function with the "usedforsecurity" param. + :param data: The data to hash. + :param usedforsecurity: The value to pass to the md5 function's "usedforsecurity" param. + Defaults to None. + :return: The hashed value. + :rtype: _Hash + """ + if PY39 and usedforsecurity is not None: + return hashlib.md5(data, usedforsecurity=usedforsecurity) # type: ignore + else: + return hashlib.md5(data) diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index 9b94a559522db..7e59a1bce7eb5 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -244,7 +244,8 @@ def _remove(self, task: DAGNode) -> None: @property def group_id(self) -> str | None: """group_id of this TaskGroup.""" - if self.task_group and self.task_group.prefix_group_id and self.task_group.group_id: + if self.task_group and self.task_group.prefix_group_id and self.task_group._group_id: + # defer to parent whether it adds a prefix return self.task_group.child_id(self._group_id) return self._group_id @@ -368,8 +369,10 @@ def child_id(self, label): Prefix label with group_id if prefix_group_id is True. Otherwise return the label as-is. """ - if self.prefix_group_id and self.group_id: - return f"{self.group_id}.{label}" + if self.prefix_group_id: + group_id = self.group_id + if group_id: + return f"{group_id}.{label}" return label diff --git a/airflow/www/app.py b/airflow/www/app.py index 5283718089100..246381f003e4e 100644 --- a/airflow/www/app.py +++ b/airflow/www/app.py @@ -19,11 +19,9 @@ import warnings from datetime import timedelta -from tempfile import gettempdir from flask import Flask from flask_appbuilder import SQLA -from flask_caching import Cache from flask_wtf.csrf import CSRFProtect from markupsafe import Markup from sqlalchemy.engine.url import make_url @@ -38,6 +36,7 @@ from airflow.utils.json import AirflowJsonProvider from airflow.www.extensions.init_appbuilder import init_appbuilder from airflow.www.extensions.init_appbuilder_links import init_appbuilder_links +from airflow.www.extensions.init_cache import init_cache from airflow.www.extensions.init_dagbag import init_dagbag from airflow.www.extensions.init_jinja_globals import init_jinja_globals from airflow.www.extensions.init_manifest_files import configure_manifest_files @@ -143,8 +142,7 @@ def create_app(config=None, testing=False): init_robots(flask_app) - cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", "CACHE_DIR": gettempdir()} - Cache(app=flask_app, config=cache_config) + init_cache(flask_app) init_flash_views(flask_app) diff --git a/airflow/www/extensions/init_cache.py b/airflow/www/extensions/init_cache.py new file mode 100644 index 0000000000000..84d952dd7120e --- /dev/null +++ b/airflow/www/extensions/init_cache.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import hashlib +from tempfile import gettempdir + +from flask_caching import Cache + +from airflow.configuration import conf +from airflow.exceptions import AirflowConfigException + +HASH_METHOD_MAPPING = { + "md5": hashlib.md5, + "sha1": hashlib.sha1, + "sha224": hashlib.sha224, + "sha256": hashlib.sha256, + "sha384": hashlib.sha384, + "sha512": hashlib.sha512, +} + + +def init_cache(app): + webserver_caching_hash_method = conf.get( + section="webserver", key="CACHING_HASH_METHOD", fallback="md5" + ).casefold() + cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", "CACHE_DIR": gettempdir()} + + mapped_hash_method = HASH_METHOD_MAPPING.get(webserver_caching_hash_method) + + if mapped_hash_method is None: + raise AirflowConfigException( + f"Unsupported webserver caching hash method: `{webserver_caching_hash_method}`." + ) + + cache_config["CACHE_OPTIONS"] = {"hash_method": mapped_hash_method} + + Cache(app=app, config=cache_config) diff --git a/airflow/www/package.json b/airflow/www/package.json index 8010f19add029..77f0df0265fcf 100644 --- a/airflow/www/package.json +++ b/airflow/www/package.json @@ -100,7 +100,7 @@ "codemirror": "^5.59.1", "color": "^4.2.3", "d3": "^3.4.4", - "d3-7": "npm:d3@7", + "d3-selection": "^3.0.0", "d3-shape": "^2.1.0", "d3-tip": "^0.9.1", "dagre-d3": "^0.6.4", diff --git a/airflow/www/static/js/api/index.ts b/airflow/www/static/js/api/index.ts index 2bbfe10b66e78..018f4e97b6068 100644 --- a/airflow/www/static/js/api/index.ts +++ b/airflow/www/static/js/api/index.ts @@ -28,7 +28,7 @@ import useClearTask from "./useClearTask"; import useMarkFailedTask from "./useMarkFailedTask"; import useMarkSuccessTask from "./useMarkSuccessTask"; import useExtraLinks from "./useExtraLinks"; -import useConfirmMarkTask from "./useConfirmMarkTask"; +import useMarkTaskDryRun from "./useMarkTaskDryRun"; import useGraphData from "./useGraphData"; import useGridData from "./useGridData"; import useMappedInstances from "./useMappedInstances"; @@ -50,7 +50,7 @@ axios.defaults.headers.common.Accept = "application/json"; export { useClearRun, useClearTask, - useConfirmMarkTask, + useMarkTaskDryRun, useDataset, useDatasetDependencies, useDatasetEvents, diff --git a/airflow/www/static/js/api/useClearTask.ts b/airflow/www/static/js/api/useClearTask.ts index ebe3b14b28b73..b4f80e5a7bf27 100644 --- a/airflow/www/static/js/api/useClearTask.ts +++ b/airflow/www/static/js/api/useClearTask.ts @@ -63,7 +63,7 @@ export default function useClearTask({ recursive: boolean; failed: boolean; confirmed: boolean; - mapIndexes: number[]; + mapIndexes?: number[]; }) => { const params = new URLSearchParamsWrapper({ csrf_token: csrfToken, @@ -105,10 +105,13 @@ export default function useClearTask({ runId, taskId, ]); + queryClient.invalidateQueries(["clearTask", dagId, runId, taskId]); startRefresh(); } }, - onError: (error: Error) => errorToast({ error }), + onError: (error: Error, { confirmed }) => { + if (confirmed) errorToast({ error }); + }, } ); } diff --git a/airflow/www/static/js/api/useClearTaskDryRun.ts b/airflow/www/static/js/api/useClearTaskDryRun.ts new file mode 100644 index 0000000000000..7f068a001f65b --- /dev/null +++ b/airflow/www/static/js/api/useClearTaskDryRun.ts @@ -0,0 +1,107 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import axios, { AxiosResponse } from "axios"; +import { useQuery } from "react-query"; +import type { MinimalTaskInstance } from "src/types"; +import URLSearchParamsWrapper from "src/utils/URLSearchParamWrapper"; +import { getMetaValue } from "../utils"; + +const csrfToken = getMetaValue("csrf_token"); +const clearUrl = getMetaValue("clear_url"); + +const useClearTaskDryRun = ({ + dagId, + runId, + taskId, + executionDate, + isGroup, + past, + future, + upstream, + downstream, + recursive, + failed, + mapIndexes = [], +}: { + dagId: string; + runId: string; + taskId: string; + executionDate: string; + isGroup: boolean; + past: boolean; + future: boolean; + upstream: boolean; + downstream: boolean; + recursive: boolean; + failed: boolean; + mapIndexes?: number[]; +}) => + useQuery( + [ + "clearTask", + dagId, + runId, + taskId, + mapIndexes, + past, + future, + upstream, + downstream, + recursive, + failed, + ], + () => { + const params = new URLSearchParamsWrapper({ + csrf_token: csrfToken, + dag_id: dagId, + dag_run_id: runId, + confirmed: false, + execution_date: executionDate, + past, + future, + upstream, + downstream, + recursive, + only_failed: failed, + }); + + if (isGroup) { + params.append("group_id", taskId); + } else { + params.append("task_id", taskId); + } + + mapIndexes.forEach((mi: number) => { + params.append("map_index", mi.toString()); + }); + + return axios.post( + clearUrl, + params.toString(), + { + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + } + ); + } + ); + +export default useClearTaskDryRun; diff --git a/airflow/www/static/js/api/useMarkFailedTask.ts b/airflow/www/static/js/api/useMarkFailedTask.ts index 23bbda60ab985..c74c86e63231f 100644 --- a/airflow/www/static/js/api/useMarkFailedTask.ts +++ b/airflow/www/static/js/api/useMarkFailedTask.ts @@ -52,7 +52,7 @@ export default function useMarkFailedTask({ future: boolean; upstream: boolean; downstream: boolean; - mapIndexes: number[]; + mapIndexes?: number[]; }) => { const params = new URLSearchParamsWrapper({ csrf_token: csrfToken, @@ -85,6 +85,12 @@ export default function useMarkFailedTask({ runId, taskId, ]); + queryClient.invalidateQueries([ + "confirmStateChange", + dagId, + runId, + taskId, + ]); startRefresh(); }, onError: (error: Error) => errorToast({ error }), diff --git a/airflow/www/static/js/api/useMarkSuccessTask.ts b/airflow/www/static/js/api/useMarkSuccessTask.ts index 2605a92526bb7..4c18aa9dc24f9 100644 --- a/airflow/www/static/js/api/useMarkSuccessTask.ts +++ b/airflow/www/static/js/api/useMarkSuccessTask.ts @@ -52,7 +52,7 @@ export default function useMarkSuccessTask({ future: boolean; upstream: boolean; downstream: boolean; - mapIndexes: number[]; + mapIndexes?: number[]; }) => { const params = new URLSearchParamsWrapper({ csrf_token: csrfToken, @@ -85,6 +85,12 @@ export default function useMarkSuccessTask({ runId, taskId, ]); + queryClient.invalidateQueries([ + "confirmStateChange", + dagId, + runId, + taskId, + ]); startRefresh(); }, onError: (error: Error) => errorToast({ error }), diff --git a/airflow/www/static/js/api/useConfirmMarkTask.ts b/airflow/www/static/js/api/useMarkTaskDryRun.ts similarity index 70% rename from airflow/www/static/js/api/useConfirmMarkTask.ts rename to airflow/www/static/js/api/useMarkTaskDryRun.ts index 4e69875ffb650..31bc278644ad9 100644 --- a/airflow/www/static/js/api/useConfirmMarkTask.ts +++ b/airflow/www/static/js/api/useMarkTaskDryRun.ts @@ -18,41 +18,48 @@ */ import axios, { AxiosResponse } from "axios"; -import { useMutation } from "react-query"; -import type { TaskState } from "src/types"; +import { useQuery } from "react-query"; +import type { TaskState, MinimalTaskInstance } from "src/types"; import URLSearchParamsWrapper from "src/utils/URLSearchParamWrapper"; import { getMetaValue } from "../utils"; -import useErrorToast from "../utils/useErrorToast"; const confirmUrl = getMetaValue("confirm_url"); -export default function useConfirmMarkTask({ +const useMarkTaskDryRun = ({ dagId, runId, taskId, state, + past, + future, + upstream, + downstream, + mapIndexes = [], }: { dagId: string; runId: string; taskId: string; state: TaskState; -}) { - const errorToast = useErrorToast(); - return useMutation( - ["confirmStateChange", dagId, runId, taskId, state], - ({ + past: boolean; + future: boolean; + upstream: boolean; + downstream: boolean; + mapIndexes?: number[]; +}) => + useQuery( + [ + "confirmStateChange", + dagId, + runId, + taskId, + state, past, future, upstream, downstream, - mapIndexes = [], - }: { - past: boolean; - future: boolean; - upstream: boolean; - downstream: boolean; - mapIndexes: number[]; - }) => { + mapIndexes, + ], + () => { const params = new URLSearchParamsWrapper({ dag_id: dagId, dag_run_id: runId, @@ -67,10 +74,10 @@ export default function useConfirmMarkTask({ mapIndexes.forEach((mi: number) => { params.append("map_index", mi.toString()); }); - return axios.get(confirmUrl, { params }); - }, - { - onError: (error: Error) => errorToast({ error }), + return axios.get(confirmUrl, { + params, + }); } ); -} + +export default useMarkTaskDryRun; diff --git a/airflow/www/static/js/components/ConfirmDialog.tsx b/airflow/www/static/js/components/ConfirmDialog.tsx deleted file mode 100644 index 4826f11ff5cfe..0000000000000 --- a/airflow/www/static/js/components/ConfirmDialog.tsx +++ /dev/null @@ -1,103 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { PropsWithChildren, useRef } from "react"; -import { - AlertDialog, - AlertDialogBody, - AlertDialogFooter, - AlertDialogHeader, - AlertDialogContent, - AlertDialogOverlay, - Button, - Code, - Text, -} from "@chakra-ui/react"; - -import { useContainerRef } from "src/context/containerRef"; - -interface Props extends PropsWithChildren { - isOpen: boolean; - onClose: () => void; - title?: string; - description: string; - affectedTasks: string[]; - onConfirm: () => void; - isLoading?: boolean; -} - -const ConfirmDialog = ({ - isOpen, - onClose, - title = "Wait a minute", - description, - affectedTasks, - onConfirm, - isLoading = false, - children, -}: Props) => { - const initialFocusRef = useRef(null); - const containerRef = useContainerRef(); - - return ( - - - - - {title} - - - - {children} - {description} - {affectedTasks.map((ti) => ( - - {ti} - - ))} - {!affectedTasks.length && No task instances to change.} - - - - - - - - - - ); -}; - -export default ConfirmDialog; diff --git a/airflow/www/static/js/dag/details/dagRun/ClearRun.tsx b/airflow/www/static/js/dag/details/dagRun/ClearRun.tsx index d943bbeb13159..dbc369799b14b 100644 --- a/airflow/www/static/js/dag/details/dagRun/ClearRun.tsx +++ b/airflow/www/static/js/dag/details/dagRun/ClearRun.tsx @@ -17,51 +17,68 @@ * under the License. */ -import React, { useState } from "react"; -import { Button, useDisclosure } from "@chakra-ui/react"; - -import { useClearRun } from "src/api"; +import React from "react"; +import { + Flex, + Button, + Menu, + MenuButton, + MenuItem, + MenuList, + MenuButtonProps, +} from "@chakra-ui/react"; +import { MdArrowDropDown } from "react-icons/md"; import { getMetaValue } from "src/utils"; -import ConfirmDialog from "src/components/ConfirmDialog"; +import { useClearRun, useQueueRun } from "src/api"; const canEdit = getMetaValue("can_edit") === "True"; +const dagId = getMetaValue("dag_id"); -interface Props { - dagId: string; +interface Props extends MenuButtonProps { runId: string; } -const ClearRun = ({ dagId, runId }: Props) => { - const [affectedTasks, setAffectedTasks] = useState([]); - const { isOpen, onOpen, onClose } = useDisclosure(); - const { mutateAsync: onClear, isLoading } = useClearRun(dagId, runId); +const ClearRun = ({ runId, ...otherProps }: Props) => { + const { mutateAsync: onClear, isLoading: isClearLoading } = useClearRun( + dagId, + runId + ); + + const { mutateAsync: onQueue, isLoading: isQueueLoading } = useQueueRun( + dagId, + runId + ); - const onClick = async () => { - const data = await onClear({ confirmed: false }); - setAffectedTasks(data); - onOpen(); + const clearExistingTasks = () => { + onClear({ confirmed: true }); }; - const onConfirm = async () => { - await onClear({ confirmed: true }); - setAffectedTasks([]); - onClose(); + const queueNewTasks = () => { + onQueue({ confirmed: true }); }; + const clearLabel = "Clear tasks or add new tasks"; return ( - <> - - - + + + + Clear + + + + + Clear existing tasks + Queue up new tasks + + ); }; diff --git a/airflow/www/static/js/dag/details/dagRun/MarkFailedRun.tsx b/airflow/www/static/js/dag/details/dagRun/MarkFailedRun.tsx deleted file mode 100644 index cd403de69f061..0000000000000 --- a/airflow/www/static/js/dag/details/dagRun/MarkFailedRun.tsx +++ /dev/null @@ -1,73 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { useState } from "react"; -import { Button, useDisclosure } from "@chakra-ui/react"; - -import { useMarkFailedRun } from "src/api"; -import { getMetaValue } from "src/utils"; -import ConfirmDialog from "src/components/ConfirmDialog"; - -const canEdit = getMetaValue("can_edit") === "True"; - -interface Props { - dagId: string; - runId: string; -} - -const MarkFailedRun = ({ dagId, runId }: Props) => { - const [affectedTasks, setAffectedTasks] = useState([]); - const { isOpen, onOpen, onClose } = useDisclosure(); - const { mutateAsync: markFailed, isLoading } = useMarkFailedRun(dagId, runId); - - const onClick = async () => { - const data = await markFailed({ confirmed: false }); - setAffectedTasks(data); - onOpen(); - }; - - const onConfirm = () => { - markFailed({ confirmed: true }); - setAffectedTasks([]); - onClose(); - }; - - return ( - <> - - - - ); -}; - -export default MarkFailedRun; diff --git a/airflow/www/static/js/dag/details/dagRun/MarkRunAs.tsx b/airflow/www/static/js/dag/details/dagRun/MarkRunAs.tsx new file mode 100644 index 0000000000000..276e9a6f056be --- /dev/null +++ b/airflow/www/static/js/dag/details/dagRun/MarkRunAs.tsx @@ -0,0 +1,90 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import React from "react"; +import { + Flex, + Button, + Menu, + MenuButton, + MenuItem, + MenuList, + MenuButtonProps, +} from "@chakra-ui/react"; +import { MdArrowDropDown } from "react-icons/md"; +import { getMetaValue } from "src/utils"; +import { useMarkFailedRun, useMarkSuccessRun } from "src/api"; +import type { RunState } from "src/types"; + +import { SimpleStatus } from "../../StatusBox"; + +const canEdit = getMetaValue("can_edit") === "True"; +const dagId = getMetaValue("dag_id"); + +interface Props extends MenuButtonProps { + runId: string; + state?: RunState; +} + +const MarkRunAs = ({ runId, state, ...otherProps }: Props) => { + const { mutateAsync: markFailed, isLoading: isMarkFailedLoading } = + useMarkFailedRun(dagId, runId); + const { mutateAsync: markSuccess, isLoading: isMarkSuccessLoading } = + useMarkSuccessRun(dagId, runId); + + const markAsFailed = () => { + markFailed({ confirmed: true }); + }; + + const markAsSuccess = () => { + markSuccess({ confirmed: true }); + }; + + const markLabel = "Manually set dag run state"; + return ( + + + + Mark state as... + + + + + + + failed + + + + success + + + + ); +}; + +export default MarkRunAs; diff --git a/airflow/www/static/js/dag/details/dagRun/MarkSuccessRun.tsx b/airflow/www/static/js/dag/details/dagRun/MarkSuccessRun.tsx deleted file mode 100644 index 17b1f679b719f..0000000000000 --- a/airflow/www/static/js/dag/details/dagRun/MarkSuccessRun.tsx +++ /dev/null @@ -1,77 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { useState } from "react"; -import { Button, useDisclosure } from "@chakra-ui/react"; - -import { useMarkSuccessRun } from "src/api"; -import ConfirmDialog from "src/components/ConfirmDialog"; -import { getMetaValue } from "src/utils"; - -const canEdit = getMetaValue("can_edit") === "True"; - -interface Props { - dagId: string; - runId: string; -} - -const MarkSuccessRun = ({ dagId, runId }: Props) => { - const [affectedTasks, setAffectedTasks] = useState([]); - const { isOpen, onOpen, onClose } = useDisclosure(); - const { mutateAsync: markSuccess, isLoading } = useMarkSuccessRun( - dagId, - runId - ); - - const onClick = async () => { - const data = await markSuccess({ confirmed: false }); - setAffectedTasks(data); - onOpen(); - }; - - const onConfirm = async () => { - await markSuccess({ confirmed: true }); - setAffectedTasks([]); - onClose(); - }; - - return ( - <> - - - - ); -}; - -export default MarkSuccessRun; diff --git a/airflow/www/static/js/dag/details/dagRun/QueueRun.tsx b/airflow/www/static/js/dag/details/dagRun/QueueRun.tsx deleted file mode 100644 index 8e8a054c40118..0000000000000 --- a/airflow/www/static/js/dag/details/dagRun/QueueRun.tsx +++ /dev/null @@ -1,76 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { useState } from "react"; -import { Button, useDisclosure } from "@chakra-ui/react"; - -import { useQueueRun } from "src/api"; -import ConfirmDialog from "src/components/ConfirmDialog"; -import { getMetaValue } from "src/utils"; - -const canEdit = getMetaValue("can_edit") === "True"; - -interface Props { - dagId: string; - runId: string; -} - -const QueueRun = ({ dagId, runId }: Props) => { - const [affectedTasks, setAffectedTasks] = useState([]); - const { isOpen, onOpen, onClose } = useDisclosure(); - const { mutateAsync: onQueue, isLoading } = useQueueRun(dagId, runId); - - // Get what the changes will be and show it in a modal - const onClick = async () => { - const data = await onQueue({ confirmed: false }); - setAffectedTasks(data); - onOpen(); - }; - - // Confirm changes - const onConfirm = async () => { - await onQueue({ confirmed: true }); - setAffectedTasks([]); - onClose(); - }; - - return ( - <> - - - - ); -}; - -export default QueueRun; diff --git a/airflow/www/static/js/dag/details/dagRun/index.tsx b/airflow/www/static/js/dag/details/dagRun/index.tsx index 289da0dc646d6..ed9dce266ff97 100644 --- a/airflow/www/static/js/dag/details/dagRun/index.tsx +++ b/airflow/www/static/js/dag/details/dagRun/index.tsx @@ -19,7 +19,6 @@ import React, { useRef } from "react"; import { Flex, - Text, Box, Button, Divider, @@ -43,10 +42,6 @@ import Time from "src/components/Time"; import RunTypeIcon from "src/components/RunTypeIcon"; import NotesAccordion from "src/dag/details/NotesAccordion"; -import MarkFailedRun from "./MarkFailedRun"; -import MarkSuccessRun from "./MarkSuccessRun"; -import QueueRun from "./QueueRun"; -import ClearRun from "./ClearRun"; import DatasetTriggerEvents from "./DatasetTriggerEvents"; const dagId = getMetaValue("dag_id"); @@ -94,20 +89,6 @@ const DagRun = ({ runId }: Props) => { overflowY="auto" pb={4} > - - - - - - - - Re-run: - - - - - - {
- {taskId && runId && } + + {runId && !taskId && ( + <> + + + + )} + {runId && taskId && ( + <> + + {!isGroup && ( + + )} + + )} + {taskId && runId && } + { const taskInstanceRef = useRef(null); const offsetTop = useOffsetTop(taskInstanceRef); const isMapIndexDefined = !(mapIndex === undefined); - const actionsMapIndexes = isMapIndexDefined ? [mapIndex] : []; const { data: { dagRuns, groups }, } = useGridData(); @@ -74,13 +72,6 @@ const TaskInstance = ({ taskId, runId, mapIndex }: Props) => { const { executionDate } = run; - let taskActionsTitle = `${isGroup ? "Task Group" : "Task"} Actions`; - if (isMapped) { - taskActionsTitle += ` for ${actionsMapIndexes.length || "all"} mapped task${ - actionsMapIndexes.length !== 1 ? "s" : "" - }`; - } - return ( { key={dagId + runId + taskId + instance.mapIndex} /> )} - - - {!isMapped && group.extraLinks && ( }; const ActionButton = ({ name, ...rest }: Props) => ( ); diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/ActionModal.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/ActionModal.tsx new file mode 100644 index 0000000000000..57128bacd13aa --- /dev/null +++ b/airflow/www/static/js/dag/details/taskInstance/taskActions/ActionModal.tsx @@ -0,0 +1,130 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import React, { ReactNode } from "react"; +import { + Button, + Modal, + ModalBody, + ModalCloseButton, + ModalContent, + ModalFooter, + ModalHeader, + ModalOverlay, + ModalProps, + Box, + Text, + Accordion, + AccordionButton, + AccordionPanel, + AccordionItem, + AccordionIcon, +} from "@chakra-ui/react"; + +import { useContainerRef } from "src/context/containerRef"; +import { Table } from "src/components/Table"; +import type { MinimalTaskInstance } from "src/types"; + +interface Props extends ModalProps { + affectedTasks?: MinimalTaskInstance[]; + header: ReactNode | string; + subheader?: ReactNode | string; + submitButton: ReactNode; +} + +const columns = [ + { + Header: "Task name", + accessor: "taskId", + }, + { + Header: "Map Index", + accessor: "mapIndex", + }, + { + Header: "Run Id", + accessor: "runId", + }, +]; + +const AffectedTasksTable = ({ + affectedTasks, +}: { + affectedTasks: MinimalTaskInstance[]; +}) => ; + +const ActionModal = ({ + isOpen, + onClose, + children, + header, + subheader, + affectedTasks = [], + submitButton, + ...otherProps +}: Props) => { + const containerRef = useContainerRef(); + return ( + + + + {header} + + + {subheader} + + {children} + + + + + + Affected Tasks: {affectedTasks?.length || 0} + + + + + + + + + + + + + + + + {submitButton} + + + + ); +}; + +export default ActionModal; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/Clear.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/Clear.tsx deleted file mode 100644 index e42f419014e20..0000000000000 --- a/airflow/www/static/js/dag/details/taskInstance/taskActions/Clear.tsx +++ /dev/null @@ -1,174 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { useState } from "react"; -import { - Button, - Flex, - ButtonGroup, - useDisclosure, - Alert, - AlertIcon, -} from "@chakra-ui/react"; - -import ConfirmDialog from "src/components/ConfirmDialog"; -import { useClearTask } from "src/api"; -import { getMetaValue } from "src/utils"; - -import ActionButton from "./ActionButton"; -import type { CommonActionProps } from "./types"; - -const canEdit = getMetaValue("can_edit") === "True"; - -const Run = ({ - dagId, - runId, - taskId, - executionDate, - mapIndexes, - isGroup, -}: CommonActionProps) => { - const [affectedTasks, setAffectedTasks] = useState([]); - - // Options check/unchecked - const [past, setPast] = useState(false); - const onTogglePast = () => setPast(!past); - - const [future, setFuture] = useState(false); - const onToggleFuture = () => setFuture(!future); - - const [upstream, setUpstream] = useState(false); - const onToggleUpstream = () => setUpstream(!upstream); - - const [downstream, setDownstream] = useState(true); - const onToggleDownstream = () => setDownstream(!downstream); - - const [recursive, setRecursive] = useState(true); - const onToggleRecursive = () => setRecursive(!recursive); - - const [failed, setFailed] = useState(false); - const onToggleFailed = () => setFailed(!failed); - - // Confirm dialog open/close - const { isOpen, onOpen, onClose } = useDisclosure(); - - const { mutateAsync: clearTask, isLoading } = useClearTask({ - dagId, - runId, - taskId, - executionDate, - isGroup: !!isGroup, - }); - - const onClick = async () => { - const data = await clearTask({ - past, - future, - upstream, - downstream, - recursive, - failed, - confirmed: false, - mapIndexes, - }); - setAffectedTasks(data); - onOpen(); - }; - - const onConfirm = async () => { - await clearTask({ - past, - future, - upstream, - downstream, - recursive, - failed, - confirmed: true, - mapIndexes, - }); - setAffectedTasks([]); - onClose(); - }; - - return ( - - - - - - - - - - - - {isGroup && (past || future) && ( - - - Clearing a TaskGroup in the future and/or past will affect all the - tasks of this group across multiple dag runs. -
- This can take a while to complete. -
- )} -
-
- ); -}; - -export default Run; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/ClearInstance.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/ClearInstance.tsx new file mode 100644 index 0000000000000..d16856255f011 --- /dev/null +++ b/airflow/www/static/js/dag/details/taskInstance/taskActions/ClearInstance.tsx @@ -0,0 +1,235 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import React, { useState } from "react"; +import { + Alert, + AlertIcon, + Box, + Button, + ButtonGroup, + ButtonProps, + Text, + useDisclosure, +} from "@chakra-ui/react"; + +import { getMetaValue } from "src/utils"; +import { useClearTask } from "src/api"; +import useClearTaskDryRun from "src/api/useClearTaskDryRun"; + +import ActionButton from "./ActionButton"; +import ActionModal from "./ActionModal"; + +const canEdit = getMetaValue("can_edit") === "True"; +const dagId = getMetaValue("dag_id"); + +interface Props extends ButtonProps { + runId: string; + taskId: string; + executionDate: string; + isGroup?: boolean; + isMapped?: boolean; + mapIndex?: number; +} + +const ClearInstance = ({ + runId, + taskId, + mapIndex, + executionDate, + isGroup, + isMapped, + ...otherProps +}: Props) => { + const { onOpen, onClose, isOpen } = useDisclosure(); + + const [past, setPast] = useState(false); + const onTogglePast = () => setPast(!past); + + const [future, setFuture] = useState(false); + const onToggleFuture = () => setFuture(!future); + + const [upstream, setUpstream] = useState(false); + const onToggleUpstream = () => setUpstream(!upstream); + + const [downstream, setDownstream] = useState(false); + const onToggleDownstream = () => setDownstream(!downstream); + + const [recursive, setRecursive] = useState(true); + const onToggleRecursive = () => setRecursive(!recursive); + + const [failed, setFailed] = useState(false); + const onToggleFailed = () => setFailed(!failed); + + const mapIndexes = + mapIndex !== undefined && mapIndex !== -1 ? [mapIndex] : undefined; + + const { data: affectedTasks, isLoading: isLoadingDryRun } = + useClearTaskDryRun({ + dagId, + runId, + taskId, + executionDate, + isGroup: !!isGroup, + past, + future, + upstream, + downstream, + recursive, + failed, + mapIndexes, + }); + + const { mutateAsync: clearTask, isLoading } = useClearTask({ + dagId, + runId, + taskId, + executionDate, + isGroup: !!isGroup, + }); + + const resetModal = () => { + onClose(); + setDownstream(false); + setUpstream(false); + setPast(false); + setFuture(false); + setRecursive(false); + setFailed(false); + }; + + const onClear = () => { + clearTask({ + confirmed: true, + past, + future, + upstream, + downstream, + recursive, + failed, + mapIndexes, + }); + resetModal(); + }; + + const clearLabel = "Clear and retry task."; + + return ( + <> + + + + + Task: + + {taskId} + + + + Run: + + {runId} + + {isMapped && ( + + + Map Index: + + {mapIndex !== undefined ? mapIndex : `All mapped tasks`} + + )} + + } + affectedTasks={affectedTasks} + submitButton={ + + } + > + + Include: + + + + + + + + + + {isGroup && (past || future) && ( + + + Clearing a TaskGroup in the future and/or past will affect all the + tasks of this group across multiple dag runs. +
+ This can take a while to complete. +
+ )} +
+ + ); +}; + +export default ClearInstance; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkFailed.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkFailed.tsx deleted file mode 100644 index 730cadc48d5dc..0000000000000 --- a/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkFailed.tsx +++ /dev/null @@ -1,141 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { useState } from "react"; -import { Button, Flex, ButtonGroup, useDisclosure } from "@chakra-ui/react"; - -import { useConfirmMarkTask, useMarkFailedTask } from "src/api"; -import ConfirmDialog from "src/components/ConfirmDialog"; -import { getMetaValue } from "src/utils"; - -import ActionButton from "./ActionButton"; - -const canEdit = getMetaValue("can_edit") === "True"; - -interface Props { - dagId: string; - runId: string; - taskId: string; - mapIndexes: number[]; -} - -const MarkFailed = ({ dagId, runId, taskId, mapIndexes }: Props) => { - const [affectedTasks, setAffectedTasks] = useState([]); - - // Options check/unchecked - const [past, setPast] = useState(false); - const onTogglePast = () => setPast(!past); - - const [future, setFuture] = useState(false); - const onToggleFuture = () => setFuture(!future); - - const [upstream, setUpstream] = useState(false); - const onToggleUpstream = () => setUpstream(!upstream); - - const [downstream, setDownstream] = useState(false); - const onToggleDownstream = () => setDownstream(!downstream); - - // Confirm dialog open/close - const { isOpen, onOpen, onClose } = useDisclosure(); - - const { mutateAsync: markFailedMutation, isLoading: isMarkLoading } = - useMarkFailedTask({ - dagId, - runId, - taskId, - }); - const { mutateAsync: confirmChangeMutation, isLoading: isConfirmLoading } = - useConfirmMarkTask({ - dagId, - runId, - taskId, - state: "failed", - }); - - const onClick = async () => { - const data = await confirmChangeMutation({ - past, - future, - upstream, - downstream, - mapIndexes, - }); - setAffectedTasks(data); - onOpen(); - }; - - const onConfirm = async () => { - await markFailedMutation({ - past, - future, - upstream, - downstream, - mapIndexes, - }); - setAffectedTasks([]); - onClose(); - }; - - const isLoading = isMarkLoading || isConfirmLoading; - - return ( - - - - - - - - - - - ); -}; - -export default MarkFailed; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkInstanceAs.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkInstanceAs.tsx new file mode 100644 index 0000000000000..600aaeb64e602 --- /dev/null +++ b/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkInstanceAs.tsx @@ -0,0 +1,267 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import React, { useState } from "react"; +import { + Flex, + Button, + Menu, + MenuButton, + MenuItem, + MenuList, + MenuButtonProps, + useDisclosure, + ButtonGroup, + Box, + Text, +} from "@chakra-ui/react"; +import { MdArrowDropDown } from "react-icons/md"; +import { capitalize } from "lodash"; + +import { getMetaValue } from "src/utils"; +import type { TaskState } from "src/types"; +import { + useMarkFailedTask, + useMarkSuccessTask, + useMarkTaskDryRun, +} from "src/api"; + +import { SimpleStatus } from "../../../StatusBox"; +import ActionButton from "./ActionButton"; +import ActionModal from "./ActionModal"; + +const canEdit = getMetaValue("can_edit") === "True"; +const dagId = getMetaValue("dag_id"); + +interface Props extends MenuButtonProps { + runId: string; + taskId: string; + state?: TaskState; + mapIndex?: number; + isMapped?: boolean; +} + +const MarkInstanceAs = ({ + runId, + taskId, + mapIndex, + isMapped, + state: currentState, + ...otherProps +}: Props) => { + const { onOpen, onClose, isOpen } = useDisclosure(); + + const [newState, setNewState] = useState<"failed" | "success">("success"); + + const [past, setPast] = useState(false); + const onTogglePast = () => setPast(!past); + + const [future, setFuture] = useState(false); + const onToggleFuture = () => setFuture(!future); + + const [upstream, setUpstream] = useState(false); + const onToggleUpstream = () => setUpstream(!upstream); + + const [downstream, setDownstream] = useState(false); + const onToggleDownstream = () => setDownstream(!downstream); + + const markAsFailed = () => { + setNewState("failed"); + onOpen(); + }; + + const markAsSuccess = () => { + setNewState("success"); + onOpen(); + }; + + const mapIndexes = + mapIndex !== undefined && mapIndex !== -1 ? [mapIndex] : undefined; + + const { data: affectedTasks, isLoading: isLoadingDryRun } = useMarkTaskDryRun( + { + dagId, + runId, + taskId, + state: newState, + past, + future, + upstream, + downstream, + mapIndexes, + } + ); + + const { mutateAsync: markFailedMutation, isLoading: isMarkFailedLoading } = + useMarkFailedTask({ + dagId, + runId, + taskId, + }); + + const { mutateAsync: markSuccessMutation, isLoading: isMarkSuccessLoading } = + useMarkSuccessTask({ + dagId, + runId, + taskId, + }); + + const resetModal = () => { + onClose(); + setDownstream(false); + setUpstream(false); + setPast(false); + setFuture(false); + }; + + const onMarkState = () => { + if (newState === "success") { + markSuccessMutation({ + past, + future, + upstream, + downstream, + mapIndexes, + }); + } else if (newState === "failed") { + markFailedMutation({ + past, + future, + upstream, + downstream, + mapIndexes, + }); + } + resetModal(); + }; + + const markLabel = "Manually set task instance state"; + const isMappedSummary = isMapped && mapIndex === undefined; + + return ( + <> + + + + Mark state as… + + + + + + + failed + + + + success + + + + + + + Task: + + {taskId} + + + + Run: + + {runId} + + {isMapped && ( + + + Map Index: + + {mapIndex !== undefined ? mapIndex : `All mapped tasks`} + + )} + + } + affectedTasks={affectedTasks} + submitButton={ + + } + > + + Include: + + + + + + + + + + ); +}; + +export default MarkInstanceAs; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkSuccess.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkSuccess.tsx deleted file mode 100644 index 7b1b9af7baf4a..0000000000000 --- a/airflow/www/static/js/dag/details/taskInstance/taskActions/MarkSuccess.tsx +++ /dev/null @@ -1,141 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React, { useState } from "react"; -import { Button, Flex, ButtonGroup, useDisclosure } from "@chakra-ui/react"; - -import ConfirmDialog from "src/components/ConfirmDialog"; -import { useMarkSuccessTask, useConfirmMarkTask } from "src/api"; -import { getMetaValue } from "src/utils"; - -import ActionButton from "./ActionButton"; - -const canEdit = getMetaValue("can_edit") === "True"; - -interface Props { - dagId: string; - runId: string; - taskId: string; - mapIndexes: number[]; -} - -const MarkSuccess = ({ dagId, runId, taskId, mapIndexes }: Props) => { - const [affectedTasks, setAffectedTasks] = useState([]); - - // Options check/unchecked - const [past, setPast] = useState(false); - const onTogglePast = () => setPast(!past); - - const [future, setFuture] = useState(false); - const onToggleFuture = () => setFuture(!future); - - const [upstream, setUpstream] = useState(false); - const onToggleUpstream = () => setUpstream(!upstream); - - const [downstream, setDownstream] = useState(false); - const onToggleDownstream = () => setDownstream(!downstream); - - // Confirm dialog open/close - const { isOpen, onOpen, onClose } = useDisclosure(); - - const { mutateAsync: markSuccessMutation, isLoading: isMarkLoading } = - useMarkSuccessTask({ - dagId, - runId, - taskId, - }); - const { mutateAsync: confirmChangeMutation, isLoading: isConfirmLoading } = - useConfirmMarkTask({ - dagId, - runId, - taskId, - state: "success", - }); - - const onClick = async () => { - const data = await confirmChangeMutation({ - past, - future, - upstream, - downstream, - mapIndexes, - }); - setAffectedTasks(data); - onOpen(); - }; - - const onConfirm = async () => { - await markSuccessMutation({ - past, - future, - upstream, - downstream, - mapIndexes, - }); - setAffectedTasks([]); - onClose(); - }; - - const isLoading = isMarkLoading || isConfirmLoading; - - return ( - - - - - - - - - - - ); -}; - -export default MarkSuccess; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/index.tsx b/airflow/www/static/js/dag/details/taskInstance/taskActions/index.tsx deleted file mode 100644 index 92e471bc26026..0000000000000 --- a/airflow/www/static/js/dag/details/taskInstance/taskActions/index.tsx +++ /dev/null @@ -1,84 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import React from "react"; -import { Box, VStack, Divider, StackDivider, Text } from "@chakra-ui/react"; - -import type { CommonActionProps } from "./types"; -import ClearAction from "./Clear"; -import MarkFailedAction from "./MarkFailed"; -import MarkSuccessAction from "./MarkSuccess"; - -type Props = { - title: string; -} & CommonActionProps; - -const TaskActions = ({ - title, - runId, - taskId, - dagId, - executionDate, - mapIndexes, - isGroup, -}: Props) => ( - - - {title} - - - {/* For now only ClearAction is supported for groups */} - {isGroup ? ( - - ) : ( - }> - - - - - )} - - -); - -export default TaskActions; diff --git a/airflow/www/static/js/dag/details/taskInstance/taskActions/types.ts b/airflow/www/static/js/dag/details/taskInstance/taskActions/types.ts deleted file mode 100644 index 2ba75d1ed8206..0000000000000 --- a/airflow/www/static/js/dag/details/taskInstance/taskActions/types.ts +++ /dev/null @@ -1,29 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import type { Dag, DagRun, TaskInstance } from "src/types"; - -export interface CommonActionProps { - runId: DagRun["runId"]; - taskId: TaskInstance["taskId"]; - dagId: Dag["id"]; - executionDate: DagRun["executionDate"]; - mapIndexes: number[]; - isGroup?: boolean; -} diff --git a/airflow/www/static/js/types/index.ts b/airflow/www/static/js/types/index.ts index 94e0e60526e18..8f4eb7d450db1 100644 --- a/airflow/www/static/js/types/index.ts +++ b/airflow/www/static/js/types/index.ts @@ -130,16 +130,19 @@ interface DatasetListItem extends API.Dataset { totalUpdates: number; } +type MinimalTaskInstance = Pick; + export type { + API, + MinimalTaskInstance, Dag, DagRun, - RunState, - TaskState, - TaskInstance, - Task, - DepNode, + DatasetListItem, DepEdge, - API, + DepNode, RunOrdering, - DatasetListItem, + RunState, + Task, + TaskInstance, + TaskState, }; diff --git a/airflow/www/views.py b/airflow/www/views.py index cabc8e9e1a35b..a0e9ce33d2708 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1050,7 +1050,6 @@ def task_stats(self, session: Session = NEW_SESSION): ) if conf.getboolean("webserver", "SHOW_RECENT_STATS_FOR_COMPLETED_RUNS", fallback=True): - last_dag_run = ( session.query(DagRun.dag_id, sqla.func.max(DagRun.execution_date).label("execution_date")) .join(DagModel, DagModel.dag_id == DagRun.dag_id) @@ -2122,7 +2121,12 @@ def _clear_dag_tis( if not details: return redirect_or_json(origin, "No task instances to clear", status="error", status_code=404) elif request.headers.get("Accept") == "application/json": - return htmlsafe_json_dumps(details, separators=(",", ":")) + if confirmed: + return htmlsafe_json_dumps(details, separators=(",", ":")) + return htmlsafe_json_dumps( + [{"task_id": ti.task_id, "map_index": ti.map_index, "run_id": ti.run_id} for ti in tis], + separators=(",", ":"), + ) return self.render_template( "airflow/confirm.html", endpoint=None, @@ -2528,8 +2532,13 @@ def confirm(self): ) if request.headers.get("Accept") == "application/json": - details = [str(t) for t in to_be_altered] - return htmlsafe_json_dumps(details, separators=(",", ":")) + return htmlsafe_json_dumps( + [ + {"task_id": ti.task_id, "map_index": ti.map_index, "run_id": ti.run_id} + for ti in to_be_altered + ], + separators=(",", ":"), + ) details = "\n".join(str(t) for t in to_be_altered) @@ -4475,7 +4484,6 @@ def action_mulduplicate(self, connections, session: Session = NEW_SESSION): "warning", ) else: - dup_conn = Connection( new_conn_id, selected_conn.conn_type, @@ -5683,7 +5691,6 @@ def list(self): ) def _calculate_graph(self): - nodes_dict: dict[str, Any] = {} edge_tuples: set[dict[str, str]] = set() diff --git a/airflow/www/yarn.lock b/airflow/www/yarn.lock index c5f1a4d24f0f9..7578b9e2bb25a 100644 --- a/airflow/www/yarn.lock +++ b/airflow/www/yarn.lock @@ -4532,7 +4532,7 @@ commander@2, commander@^2.20.0: resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ== -commander@7, commander@^7.0.0, commander@^7.2.0: +commander@^7.0.0, commander@^7.2.0: version "7.2.0" resolved "https://registry.yarnpkg.com/commander/-/commander-7.2.0.tgz#a36cb57d0b501ce108e4d20559a150a391d97ab7" integrity sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw== @@ -4824,42 +4824,6 @@ csstype@^3.0.2: resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.0.8.tgz#d2266a792729fb227cd216fb572f43728e1ad340" integrity sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw== -"d3-7@npm:d3@7": - version "7.8.0" - resolved "https://registry.yarnpkg.com/d3/-/d3-7.8.0.tgz#c9441f0ea9266b1003a97c2ffd53e79e9e14b1fc" - integrity sha512-a5rNemRadWkEfqnY5NsD4RdCP9vn8EIJ4I5Rl14U0uKH1SXqcNmk/h9aGaAF1O98lz6L9M0IeUcuPa9GUYbI5A== - dependencies: - d3-array "3" - d3-axis "3" - d3-brush "3" - d3-chord "3" - d3-color "3" - d3-contour "4" - d3-delaunay "6" - d3-dispatch "3" - d3-drag "3" - d3-dsv "3" - d3-ease "3" - d3-fetch "3" - d3-force "3" - d3-format "3" - d3-geo "3" - d3-hierarchy "3" - d3-interpolate "3" - d3-path "3" - d3-polygon "3" - d3-quadtree "3" - d3-random "3" - d3-scale "4" - d3-scale-chromatic "3" - d3-selection "3" - d3-shape "3" - d3-time "3" - d3-time-format "4" - d3-timer "3" - d3-transition "3" - d3-zoom "3" - d3-array@1, d3-array@^1.1.1, d3-array@^1.2.0: version "1.2.4" resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-1.2.4.tgz#635ce4d5eea759f6f605863dbcfc30edc737f71f" @@ -4872,23 +4836,11 @@ d3-array@2, d3-array@^2.3.0: dependencies: internmap "^1.0.0" -"d3-array@2 - 3", "d3-array@2.10.0 - 3", "d3-array@2.5.0 - 3", d3-array@3, d3-array@^3.2.0: - version "3.2.1" - resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-3.2.1.tgz#39331ea706f5709417d31bbb6ec152e0328b39b3" - integrity sha512-gUY/qeHq/yNqqoCKNq4vtpFLdoCdvyNpWoC/KNjhGbhDuQpAM9sIQQKkXSNpXa9h5KySs/gzm7R88WkUutgwWQ== - dependencies: - internmap "1 - 2" - d3-axis@1: version "1.0.12" resolved "https://registry.yarnpkg.com/d3-axis/-/d3-axis-1.0.12.tgz#cdf20ba210cfbb43795af33756886fb3638daac9" integrity sha512-ejINPfPSNdGFKEOAtnBtdkpr24c4d4jsei6Lg98mxf424ivoDP2956/5HDpIAtmHo85lqT4pruy+zEgvRUBqaQ== -d3-axis@3: - version "3.0.0" - resolved "https://registry.yarnpkg.com/d3-axis/-/d3-axis-3.0.0.tgz#c42a4a13e8131d637b745fc2973824cfeaf93322" - integrity sha512-IH5tgjV4jE/GhHkRV0HiVYPDtvfjHQlQfJHs0usq7M30XcSBvOotpmH1IgkcXsO/5gEQZD43B//fc7SRT5S+xw== - d3-brush@1: version "1.1.6" resolved "https://registry.yarnpkg.com/d3-brush/-/d3-brush-1.1.6.tgz#b0a22c7372cabec128bdddf9bddc058592f89e9b" @@ -4900,17 +4852,6 @@ d3-brush@1: d3-selection "1" d3-transition "1" -d3-brush@3: - version "3.0.0" - resolved "https://registry.yarnpkg.com/d3-brush/-/d3-brush-3.0.0.tgz#6f767c4ed8dcb79de7ede3e1c0f89e63ef64d31c" - integrity sha512-ALnjWlVYkXsVIGlOsuWH1+3udkYFI48Ljihfnh8FZPF2QS9o+PzGLBslO0PjzVoHLZ2KCVgAM8NVkXPJB2aNnQ== - dependencies: - d3-dispatch "1 - 3" - d3-drag "2 - 3" - d3-interpolate "1 - 3" - d3-selection "3" - d3-transition "3" - d3-chord@1: version "1.0.6" resolved "https://registry.yarnpkg.com/d3-chord/-/d3-chord-1.0.6.tgz#309157e3f2db2c752f0280fedd35f2067ccbb15f" @@ -4919,19 +4860,12 @@ d3-chord@1: d3-array "1" d3-path "1" -d3-chord@3: - version "3.0.1" - resolved "https://registry.yarnpkg.com/d3-chord/-/d3-chord-3.0.1.tgz#d156d61f485fce8327e6abf339cb41d8cbba6966" - integrity sha512-VE5S6TNa+j8msksl7HwjxMHDM2yNK3XCkusIlpX5kwauBfXuyLAtNg9jCp/iHH61tgI4sb6R/EIMWCqEIdjT/g== - dependencies: - d3-path "1 - 3" - d3-collection@1, d3-collection@^1.0.4: version "1.0.7" resolved "https://registry.yarnpkg.com/d3-collection/-/d3-collection-1.0.7.tgz#349bd2aa9977db071091c13144d5e4f16b5b310e" integrity sha512-ii0/r5f4sjKNTfh84Di+DpztYwqKhEyUlKoPrzUFfeSkWxjW49xU2QzO9qrPrNkpdI0XJkfzvmTu8V2Zylln6A== -d3-color@1, "d3-color@1 - 2", "d3-color@1 - 3", d3-color@3, d3-color@^3.1.0: +d3-color@1, "d3-color@1 - 2", "d3-color@1 - 3", d3-color@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/d3-color/-/d3-color-3.1.0.tgz#395b2833dfac71507f12ac2f7af23bf819de24e2" integrity sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA== @@ -4943,26 +4877,12 @@ d3-contour@1: dependencies: d3-array "^1.1.1" -d3-contour@4: - version "4.0.0" - resolved "https://registry.yarnpkg.com/d3-contour/-/d3-contour-4.0.0.tgz#5a1337c6da0d528479acdb5db54bc81a0ff2ec6b" - integrity sha512-7aQo0QHUTu/Ko3cP9YK9yUTxtoDEiDGwnBHyLxG5M4vqlBkO/uixMRele3nfsfj6UXOcuReVpVXzAboGraYIJw== - dependencies: - d3-array "^3.2.0" - -d3-delaunay@6: - version "6.0.2" - resolved "https://registry.yarnpkg.com/d3-delaunay/-/d3-delaunay-6.0.2.tgz#7fd3717ad0eade2fc9939f4260acfb503f984e92" - integrity sha512-IMLNldruDQScrcfT+MWnazhHbDJhcRJyOEBAJfwQnHle1RPh6WDuLvxNArUju2VSMSUuKlY5BGHRJ2cYyoFLQQ== - dependencies: - delaunator "5" - d3-dispatch@1: version "1.0.6" resolved "https://registry.yarnpkg.com/d3-dispatch/-/d3-dispatch-1.0.6.tgz#00d37bcee4dd8cd97729dd893a0ac29caaba5d58" integrity sha512-fVjoElzjhCEy+Hbn8KygnmMS7Or0a9sI2UzGwoB7cCtvI1XpVN9GpoYlnb3xt2YV66oXYb1fLJ8GMvP4hdU1RA== -"d3-dispatch@1 - 3", d3-dispatch@3: +"d3-dispatch@1 - 3": version "3.0.1" resolved "https://registry.yarnpkg.com/d3-dispatch/-/d3-dispatch-3.0.1.tgz#5fc75284e9c2375c36c839411a0cf550cbfc4d5e" integrity sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg== @@ -4975,7 +4895,7 @@ d3-drag@1: d3-dispatch "1" d3-selection "1" -"d3-drag@2 - 3", d3-drag@3, d3-drag@^3.0.0: +"d3-drag@2 - 3", d3-drag@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/d3-drag/-/d3-drag-3.0.0.tgz#994aae9cd23c719f53b5e10e3a0a6108c69607ba" integrity sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg== @@ -4992,21 +4912,12 @@ d3-dsv@1: iconv-lite "0.4" rw "1" -"d3-dsv@1 - 3", d3-dsv@3: - version "3.0.1" - resolved "https://registry.yarnpkg.com/d3-dsv/-/d3-dsv-3.0.1.tgz#c63af978f4d6a0d084a52a673922be2160789b73" - integrity sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q== - dependencies: - commander "7" - iconv-lite "0.6" - rw "1" - d3-ease@1: version "1.0.7" resolved "https://registry.yarnpkg.com/d3-ease/-/d3-ease-1.0.7.tgz#9a834890ef8b8ae8c558b2fe55bd57f5993b85e2" integrity sha512-lx14ZPYkhNx0s/2HX5sLFUI3mbasHjSSpwO/KaaNACweVwxUruKyWVcb293wMv1RqTPZyZ8kSZ2NogUZNcLOFQ== -"d3-ease@1 - 3", d3-ease@3: +"d3-ease@1 - 3": version "3.0.1" resolved "https://registry.yarnpkg.com/d3-ease/-/d3-ease-3.0.1.tgz#9658ac38a2140d59d346160f1f6c30fda0bd12f4" integrity sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w== @@ -5018,13 +4929,6 @@ d3-fetch@1: dependencies: d3-dsv "1" -d3-fetch@3: - version "3.0.1" - resolved "https://registry.yarnpkg.com/d3-fetch/-/d3-fetch-3.0.1.tgz#83141bff9856a0edb5e38de89cdcfe63d0a60a22" - integrity sha512-kpkQIM20n3oLVBKGg6oHrUchHM3xODkTzjMoj7aWQFq5QEM+R6E4WkzT5+tojDY7yjez8KgCBRoj4aEr99Fdqw== - dependencies: - d3-dsv "1 - 3" - d3-force@1: version "1.2.1" resolved "https://registry.yarnpkg.com/d3-force/-/d3-force-1.2.1.tgz#fd29a5d1ff181c9e7f0669e4bd72bdb0e914ec0b" @@ -5035,15 +4939,6 @@ d3-force@1: d3-quadtree "1" d3-timer "1" -d3-force@3: - version "3.0.0" - resolved "https://registry.yarnpkg.com/d3-force/-/d3-force-3.0.0.tgz#3e2ba1a61e70888fe3d9194e30d6d14eece155c4" - integrity sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg== - dependencies: - d3-dispatch "1 - 3" - d3-quadtree "1 - 3" - d3-timer "1 - 3" - d3-format@1: version "1.4.5" resolved "https://registry.yarnpkg.com/d3-format/-/d3-format-1.4.5.tgz#374f2ba1320e3717eb74a9356c67daee17a7edb4" @@ -5054,11 +4949,6 @@ d3-format@1: resolved "https://registry.yarnpkg.com/d3-format/-/d3-format-2.0.0.tgz#a10bcc0f986c372b729ba447382413aabf5b0767" integrity sha512-Ab3S6XuE/Q+flY96HXT0jOXcM4EAClYFnRGY5zsjRGNy6qCYrQsMffs7cV5Q9xejb35zxW5hf/guKw34kvIKsA== -"d3-format@1 - 3", d3-format@3: - version "3.1.0" - resolved "https://registry.yarnpkg.com/d3-format/-/d3-format-3.1.0.tgz#9260e23a28ea5cb109e93b21a06e24e2ebd55641" - integrity sha512-YyUI6AEuY/Wpt8KWLgZHsIU86atmikuoOmCfommt0LYHiQSPjvX2AcFc38PX0CBpr2RCyZhjex+NS/LPOv6YqA== - d3-geo@1: version "1.12.1" resolved "https://registry.yarnpkg.com/d3-geo/-/d3-geo-1.12.1.tgz#7fc2ab7414b72e59fbcbd603e80d9adc029b035f" @@ -5066,23 +4956,11 @@ d3-geo@1: dependencies: d3-array "1" -d3-geo@3: - version "3.1.0" - resolved "https://registry.yarnpkg.com/d3-geo/-/d3-geo-3.1.0.tgz#74fd54e1f4cebd5185ac2039217a98d39b0a4c0e" - integrity sha512-JEo5HxXDdDYXCaWdwLRt79y7giK8SbhZJbFWXqbRTolCHFI5jRqteLzCsq51NKbUoX0PjBVSohxrx+NoOUujYA== - dependencies: - d3-array "2.5.0 - 3" - d3-hierarchy@1: version "1.1.9" resolved "https://registry.yarnpkg.com/d3-hierarchy/-/d3-hierarchy-1.1.9.tgz#2f6bee24caaea43f8dc37545fa01628559647a83" integrity sha512-j8tPxlqh1srJHAtxfvOUwKNYJkQuBFdM1+JAUfq6xqH5eAqf93L7oG1NVqDa4CpFZNvnNKtCYEUC8KY9yEn9lQ== -d3-hierarchy@3: - version "3.1.2" - resolved "https://registry.yarnpkg.com/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz#b01cd42c1eed3d46db77a5966cf726f8c09160c6" - integrity sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA== - d3-interpolate@1, d3-interpolate@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/d3-interpolate/-/d3-interpolate-1.4.0.tgz#526e79e2d80daa383f9e0c1c1c7dcc0f0583e987" @@ -5090,7 +4968,7 @@ d3-interpolate@1, d3-interpolate@^1.4.0: dependencies: d3-color "1" -"d3-interpolate@1 - 3", "d3-interpolate@1.2.0 - 3", d3-interpolate@3: +"d3-interpolate@1 - 3": version "3.0.1" resolved "https://registry.yarnpkg.com/d3-interpolate/-/d3-interpolate-3.0.1.tgz#3c47aa5b32c5b3dfb56ef3fd4342078a632b400d" integrity sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g== @@ -5114,41 +4992,21 @@ d3-path@1, d3-path@^1.0.5: resolved "https://registry.yarnpkg.com/d3-path/-/d3-path-2.0.0.tgz#55d86ac131a0548adae241eebfb56b4582dd09d8" integrity sha512-ZwZQxKhBnv9yHaiWd6ZU4x5BtCQ7pXszEV9CU6kRgwIQVQGLMv1oiL4M+MK/n79sYzsj+gcgpPQSctJUsLN7fA== -"d3-path@1 - 3", d3-path@3, d3-path@^3.1.0: - version "3.1.0" - resolved "https://registry.yarnpkg.com/d3-path/-/d3-path-3.1.0.tgz#22df939032fb5a71ae8b1800d61ddb7851c42526" - integrity sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ== - d3-polygon@1: version "1.0.6" resolved "https://registry.yarnpkg.com/d3-polygon/-/d3-polygon-1.0.6.tgz#0bf8cb8180a6dc107f518ddf7975e12abbfbd38e" integrity sha512-k+RF7WvI08PC8reEoXa/w2nSg5AUMTi+peBD9cmFc+0ixHfbs4QmxxkarVal1IkVkgxVuk9JSHhJURHiyHKAuQ== -d3-polygon@3: - version "3.0.1" - resolved "https://registry.yarnpkg.com/d3-polygon/-/d3-polygon-3.0.1.tgz#0b45d3dd1c48a29c8e057e6135693ec80bf16398" - integrity sha512-3vbA7vXYwfe1SYhED++fPUQlWSYTTGmFmQiany/gdbiWgU/iEyQzyymwL9SkJjFFuCS4902BSzewVGsHHmHtXg== - d3-quadtree@1: version "1.0.7" resolved "https://registry.yarnpkg.com/d3-quadtree/-/d3-quadtree-1.0.7.tgz#ca8b84df7bb53763fe3c2f24bd435137f4e53135" integrity sha512-RKPAeXnkC59IDGD0Wu5mANy0Q2V28L+fNe65pOCXVdVuTJS3WPKaJlFHer32Rbh9gIo9qMuJXio8ra4+YmIymA== -"d3-quadtree@1 - 3", d3-quadtree@3: - version "3.0.1" - resolved "https://registry.yarnpkg.com/d3-quadtree/-/d3-quadtree-3.0.1.tgz#6dca3e8be2b393c9a9d514dabbd80a92deef1a4f" - integrity sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw== - d3-random@1: version "1.1.2" resolved "https://registry.yarnpkg.com/d3-random/-/d3-random-1.1.2.tgz#2833be7c124360bf9e2d3fd4f33847cfe6cab291" integrity sha512-6AK5BNpIFqP+cx/sreKzNjWbwZQCSUatxq+pPRmFIQaWuoD+NrbVWw7YWpHiXpCQ/NanKdtGDuB+VQcZDaEmYQ== -d3-random@3: - version "3.0.1" - resolved "https://registry.yarnpkg.com/d3-random/-/d3-random-3.0.1.tgz#d4926378d333d9c0bfd1e6fa0194d30aebaa20f4" - integrity sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ== - d3-scale-chromatic@1: version "1.5.0" resolved "https://registry.yarnpkg.com/d3-scale-chromatic/-/d3-scale-chromatic-1.5.0.tgz#54e333fc78212f439b14641fb55801dd81135a98" @@ -5157,14 +5015,6 @@ d3-scale-chromatic@1: d3-color "1" d3-interpolate "1" -d3-scale-chromatic@3: - version "3.0.0" - resolved "https://registry.yarnpkg.com/d3-scale-chromatic/-/d3-scale-chromatic-3.0.0.tgz#15b4ceb8ca2bb0dcb6d1a641ee03d59c3b62376a" - integrity sha512-Lx9thtxAKrO2Pq6OO2Ua474opeziKr279P/TKZsMAhYyNDD3EnCffdbgeSYN5O7m2ByQsxtuP2CSDczNUIZ22g== - dependencies: - d3-color "1 - 3" - d3-interpolate "1 - 3" - d3-scale@2: version "2.2.2" resolved "https://registry.yarnpkg.com/d3-scale/-/d3-scale-2.2.2.tgz#4e880e0b2745acaaddd3ede26a9e908a9e17b81f" @@ -5177,17 +5027,6 @@ d3-scale@2: d3-time "1" d3-time-format "2" -d3-scale@4: - version "4.0.2" - resolved "https://registry.yarnpkg.com/d3-scale/-/d3-scale-4.0.2.tgz#82b38e8e8ff7080764f8dcec77bd4be393689396" - integrity sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ== - dependencies: - d3-array "2.10.0 - 3" - d3-format "1 - 3" - d3-interpolate "1.2.0 - 3" - d3-time "2.1.1 - 3" - d3-time-format "2 - 4" - d3-scale@^3.3.0: version "3.3.0" resolved "https://registry.yarnpkg.com/d3-scale/-/d3-scale-3.3.0.tgz#28c600b29f47e5b9cd2df9749c206727966203f3" @@ -5216,13 +5055,6 @@ d3-shape@1, d3-shape@^1.0.6, d3-shape@^1.2.0: dependencies: d3-path "1" -d3-shape@3: - version "3.2.0" - resolved "https://registry.yarnpkg.com/d3-shape/-/d3-shape-3.2.0.tgz#a1a839cbd9ba45f28674c69d7f855bcf91dfc6a5" - integrity sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA== - dependencies: - d3-path "^3.1.0" - d3-shape@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/d3-shape/-/d3-shape-2.1.0.tgz#3b6a82ccafbc45de55b57fcf956c584ded3b666f" @@ -5244,13 +5076,6 @@ d3-time-format@2: dependencies: d3-time "1 - 2" -"d3-time-format@2 - 4", d3-time-format@4: - version "4.1.0" - resolved "https://registry.yarnpkg.com/d3-time-format/-/d3-time-format-4.1.0.tgz#7ab5257a5041d11ecb4fe70a5c7d16a195bb408a" - integrity sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg== - dependencies: - d3-time "1 - 3" - d3-time@1: version "1.1.0" resolved "https://registry.yarnpkg.com/d3-time/-/d3-time-1.1.0.tgz#b1e19d307dae9c900b7e5b25ffc5dcc249a8a0f1" @@ -5263,19 +5088,12 @@ d3-time@1: dependencies: d3-array "2" -"d3-time@1 - 3", "d3-time@2.1.1 - 3", d3-time@3: - version "3.1.0" - resolved "https://registry.yarnpkg.com/d3-time/-/d3-time-3.1.0.tgz#9310db56e992e3c0175e1ef385e545e48a9bb5c7" - integrity sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q== - dependencies: - d3-array "2 - 3" - d3-timer@1: version "1.0.10" resolved "https://registry.yarnpkg.com/d3-timer/-/d3-timer-1.0.10.tgz#dfe76b8a91748831b13b6d9c793ffbd508dd9de5" integrity sha512-B1JDm0XDaQC+uvo4DT79H0XmBskgS3l6Ve+1SBCfxgmtIb1AVrPIoqd+nPSv+loMX8szQ0sVUhGngL7D5QPiXw== -"d3-timer@1 - 3", d3-timer@3: +"d3-timer@1 - 3": version "3.0.1" resolved "https://registry.yarnpkg.com/d3-timer/-/d3-timer-3.0.1.tgz#6284d2a2708285b1abb7e201eda4380af35e63b0" integrity sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA== @@ -5300,7 +5118,7 @@ d3-transition@1: d3-selection "^1.1.0" d3-timer "1" -"d3-transition@2 - 3", d3-transition@3: +"d3-transition@2 - 3": version "3.0.1" resolved "https://registry.yarnpkg.com/d3-transition/-/d3-transition-3.0.1.tgz#6869fdde1448868077fdd5989200cb61b2a1645f" integrity sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w== @@ -5327,7 +5145,7 @@ d3-zoom@1: d3-selection "1" d3-transition "1" -d3-zoom@3, d3-zoom@^3.0.0: +d3-zoom@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/d3-zoom/-/d3-zoom-3.0.0.tgz#d13f4165c73217ffeaa54295cd6969b3e7aee8f3" integrity sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw== @@ -5538,13 +5356,6 @@ del@^4.1.1: pify "^4.0.1" rimraf "^2.6.3" -delaunator@5: - version "5.0.0" - resolved "https://registry.yarnpkg.com/delaunator/-/delaunator-5.0.0.tgz#60f052b28bd91c9b4566850ebf7756efe821d81b" - integrity sha512-AyLvtyJdbv/U1GkiS6gUUzclRoAY4Gs75qkMygJJhU75LW4DNuSF2RMzpxs9jw9Oz1BobHjTdkG3zdP55VxAqw== - dependencies: - robust-predicates "^3.0.0" - delayed-stream@~1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" @@ -6929,13 +6740,6 @@ iconv-lite@0.4, iconv-lite@0.4.24, iconv-lite@^0.4.4: dependencies: safer-buffer ">= 2.1.2 < 3" -iconv-lite@0.6: - version "0.6.3" - resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501" - integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw== - dependencies: - safer-buffer ">= 2.1.2 < 3.0.0" - icss-utils@^5.0.0, icss-utils@^5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-5.1.0.tgz#c6be6858abd013d768e98366ae47e25d5887b1ae" @@ -7038,11 +6842,6 @@ internal-slot@^1.0.5: has "^1.0.3" side-channel "^1.0.4" -"internmap@1 - 2": - version "2.0.3" - resolved "https://registry.yarnpkg.com/internmap/-/internmap-2.0.3.tgz#6685f23755e43c524e251d29cbc97248e3061009" - integrity sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg== - internmap@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/internmap/-/internmap-1.0.1.tgz#0017cc8a3b99605f0302f2b198d272e015e5df95" @@ -10445,11 +10244,6 @@ rimraf@^2.6.3: dependencies: glob "^7.1.3" -robust-predicates@^3.0.0: - version "3.0.1" - resolved "https://registry.yarnpkg.com/robust-predicates/-/robust-predicates-3.0.1.tgz#ecde075044f7f30118682bd9fb3f123109577f9a" - integrity sha512-ndEIpszUHiG4HtDsQLeIuMvRsDnn8c8rYStabochtUeCvfuvNptb5TUbVD68LRAILPX7p9nqQGh4xJgn3EHS/g== - run-parallel@^1.1.9: version "1.2.0" resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" @@ -10488,7 +10282,7 @@ safe-regex-test@^1.0.0: get-intrinsic "^1.1.3" is-regex "^1.1.4" -"safer-buffer@>= 2.1.2 < 3", "safer-buffer@>= 2.1.2 < 3.0.0": +"safer-buffer@>= 2.1.2 < 3": version "2.1.2" resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 856c36a1d389b..a3a9081d785f4 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -19,7 +19,7 @@ --- apiVersion: v2 name: airflow -version: 1.9.0-dev +version: 1.10.0-dev appVersion: 2.5.3 description: The official Helm chart to deploy Apache Airflow, a platform to programmatically author, schedule, and monitor workflows diff --git a/chart/templates/dag-processor/dag-processor-deployment.yaml b/chart/templates/dag-processor/dag-processor-deployment.yaml index 964f239e8f4f4..f2ff88d8a6468 100644 --- a/chart/templates/dag-processor/dag-processor-deployment.yaml +++ b/chart/templates/dag-processor/dag-processor-deployment.yaml @@ -173,6 +173,35 @@ spec: {{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) }} {{- include "git_sync_container" . | indent 8 }} {{- end }} + {{- if .Values.dagProcessor.logGroomerSidecar.enabled }} + - name: dag-processor-log-groomer + resources: {{- toYaml .Values.dagProcessor.logGroomerSidecar.resources | nindent 12 }} + image: {{ template "airflow_image" . }} + imagePullPolicy: {{ .Values.images.airflow.pullPolicy }} + {{- if .Values.dagProcessor.logGroomerSidecar.command }} + command: {{ tpl (toYaml .Values.dagProcessor.logGroomerSidecar.command) . | nindent 12 }} + {{- end }} + {{- if .Values.dagProcessor.logGroomerSidecar.args }} + args: {{- tpl (toYaml .Values.dagProcessor.logGroomerSidecar.args) . | nindent 12 }} + {{- end }} + {{- if .Values.dagProcessor.logGroomerSidecar.retentionDays }} + env: + - name: AIRFLOW__LOG_RETENTION_DAYS + value: "{{ .Values.dagProcessor.logGroomerSidecar.retentionDays }}" + {{- end }} + volumeMounts: + - name: logs + mountPath: {{ template "airflow_logs" . }} + {{- if .Values.volumeMounts }} + {{- toYaml .Values.volumeMounts | nindent 12 }} + {{- end }} + {{- if .Values.dagProcessor.extraVolumeMounts }} + {{- tpl (toYaml .Values.dagProcessor.extraVolumeMounts) . | nindent 12 }} + {{- end }} + {{- if or .Values.webserver.webserverConfig .Values.webserver.webserverConfigConfigMapName }} + {{- include "airflow_webserver_config_mount" . | nindent 12 }} + {{- end }} + {{- end }} {{- if .Values.dagProcessor.extraContainers }} {{- toYaml .Values.dagProcessor.extraContainers | nindent 8 }} {{- end }} diff --git a/chart/templates/webserver/webserver-deployment.yaml b/chart/templates/webserver/webserver-deployment.yaml index 9fd5b4326ec75..03b8b456b4606 100644 --- a/chart/templates/webserver/webserver-deployment.yaml +++ b/chart/templates/webserver/webserver-deployment.yaml @@ -203,10 +203,10 @@ spec: value: {{ regexReplaceAll ":\\d+$" (urlParse (tpl .Values.config.webserver.base_url .)).host "" }} {{- end }} scheme: {{ .Values.webserver.livenessProbe.scheme | default "http" }} - initialDelaySeconds: {{ .Values.webserver.livenessProbe.initialDelaySeconds | default 15 }} - timeoutSeconds: {{ .Values.webserver.livenessProbe.timeoutSeconds | default 30 }} - failureThreshold: {{ .Values.webserver.livenessProbe.failureThreshold | default 20 }} - periodSeconds: {{ .Values.webserver.livenessProbe.periodSeconds | default 5 }} + initialDelaySeconds: {{ .Values.webserver.livenessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.webserver.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.webserver.livenessProbe.failureThreshold }} + periodSeconds: {{ .Values.webserver.livenessProbe.periodSeconds }} readinessProbe: httpGet: path: {{ if .Values.config.webserver.base_url }}{{- with urlParse (tpl .Values.config.webserver.base_url .) }}{{ .path }}{{ end }}{{ end }}/health @@ -217,10 +217,10 @@ spec: value: {{ regexReplaceAll ":\\d+$" (urlParse (tpl .Values.config.webserver.base_url .)).host "" }} {{- end }} scheme: {{ .Values.webserver.readinessProbe.scheme | default "http" }} - initialDelaySeconds: {{ .Values.webserver.readinessProbe.initialDelaySeconds | default 15 }} - timeoutSeconds: {{ .Values.webserver.readinessProbe.timeoutSeconds | default 30 }} - failureThreshold: {{ .Values.webserver.readinessProbe.failureThreshold | default 20 }} - periodSeconds: {{ .Values.webserver.readinessProbe.periodSeconds | default 5 }} + initialDelaySeconds: {{ .Values.webserver.readinessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.webserver.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.webserver.readinessProbe.failureThreshold }} + periodSeconds: {{ .Values.webserver.readinessProbe.periodSeconds }} envFrom: {{- include "custom_airflow_environment_from" . | default "\n []" | indent 10 }} env: {{- include "custom_airflow_environment" . | indent 10 }} diff --git a/chart/values.schema.json b/chart/values.schema.json index 1e1ce381fd4e1..d495c92a15627 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -2697,6 +2697,10 @@ } ] }, + "logGroomerSidecar": { + "$ref": "#/definitions/logGroomerConfigType", + "description": "Configuration for log groomer sidecar" + }, "waitForMigrations": { "description": "wait-for-airflow-migrations init container.", "type": "object", @@ -3211,17 +3215,17 @@ "timeoutSeconds": { "description": "Webserver Liveness probe timeout seconds.", "type": "integer", - "default": 30 + "default": 5 }, "failureThreshold": { "description": "Webserver Liveness probe failure threshold.", "type": "integer", - "default": 20 + "default": 5 }, "periodSeconds": { "description": "Webserver Liveness probe period seconds.", "type": "integer", - "default": 5 + "default": 10 }, "scheme": { "description": "Webserver Liveness probe scheme.", @@ -3243,17 +3247,17 @@ "timeoutSeconds": { "description": "Webserver Readiness probe timeout seconds.", "type": "integer", - "default": 30 + "default": 5 }, "failureThreshold": { "description": "Webserver Readiness probe failure threshold.", "type": "integer", - "default": 20 + "default": 5 }, "periodSeconds": { "description": "Webserver Readiness probe period seconds.", "type": "integer", - "default": 5 + "default": 10 }, "scheme": { "description": "Webserver Readiness probe scheme.", diff --git a/chart/values.yaml b/chart/values.yaml index 5a7613e96c543..af5221e40219e 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -958,16 +958,16 @@ webserver: allowPodLogReading: true livenessProbe: initialDelaySeconds: 15 - timeoutSeconds: 30 - failureThreshold: 20 - periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 scheme: HTTP readinessProbe: initialDelaySeconds: 15 - timeoutSeconds: 30 - failureThreshold: 20 - periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 5 + periodSeconds: 10 scheme: HTTP # Number of webservers @@ -1357,6 +1357,23 @@ dagProcessor: podAnnotations: {} + logGroomerSidecar: + # Whether to deploy the Airflow dag processor log groomer sidecar. + enabled: true + # Command to use when running the Airflow dag processor log groomer sidecar (templated). + command: ~ + # Args to use when running the Airflow dag processor log groomer sidecar (templated). + args: ["bash", "/clean-logs"] + # Number of days to retain logs + retentionDays: 15 + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + waitForMigrations: # Whether to create init container to wait for db migrations enabled: true diff --git a/codecov.yml b/codecov.yml index 9258dda0595dd..68d60fd0262c2 100644 --- a/codecov.yml +++ b/codecov.yml @@ -50,6 +50,8 @@ coverage: - v2-4-test - v2-5-stable - v2-5-test + - v2-6-stable + - v2-6-test if_not_found: success if_ci_failed: error informational: true @@ -75,6 +77,8 @@ coverage: - v2-4-test - v2-5-stable - v2-5-test + - v2-6-stable + - v2-6-test if_no_uploads: error if_not_found: success if_ci_failed: error diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index c97fb93ab7cb9..ad48e3756e228 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -48,7 +48,7 @@ - [Close the milestone](#close-the-milestone) - [Close the testing status issue](#close-the-testing-status-issue) - [Announce the release on the community slack](#announce-the-release-on-the-community-slack) - - [Tweet about the release](#tweet-about-the-release) + - [Announce about the release in social media](#announce-about-the-release-in-social-media) - [Update `main` with the latest release details](#update-main-with-the-latest-release-details) - [Update default Airflow version in the helm chart](#update-default-airflow-version-in-the-helm-chart) - [Update airflow/config_templates/config.yml file](#update-airflowconfig_templatesconfigyml-file) @@ -801,9 +801,19 @@ Thanks to all the contributors who made this possible. EOF ``` -## Tweet about the release +## Announce about the release in social media -Tweet about the release: +------------------------------------------------------------------------------------------------------------ +Announcement is done from official Apache-Airflow accounts. + +* Twitter: https://twitter.com/ApacheAirflow +* Linkedin: https://www.linkedin.com/company/apache-airflow/ + +If you don't have access to the account ask PMC to post. + +------------------------------------------------------------------------------------------------------------ + +Tweet and post on Linkedin about the release: ```shell cat <=4.4; python_version < "3.8" jinja2 + packaging pendulum pre-commit psutil @@ -73,7 +74,6 @@ install_requires = twine wheel setuptools - jinja2 [options.packages.find] where=src diff --git a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py index 961b642aa0e79..35e5405d9a98a 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py +++ b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py @@ -262,6 +262,8 @@ def create_issue_for_testing(version, previous_version, github_token): def remove_old_releases(version, repo_root): + if confirm_action("In beta release we do not remove old RCs. Is this a beta release?"): + return if not confirm_action("Do you want to look for old RCs to remove?"): return @@ -301,18 +303,20 @@ def remove_old_releases(version, repo_root): ) @option_answer def publish_release_candidate(version, previous_version, github_token): - if "rc" not in version: - exit("Version must contain 'rc'") - if "rc" in previous_version: - exit("Previous version must not contain 'rc'") + from packaging.version import Version + + if not Version(version).is_prerelease: + exit("--version value must be a pre-release") + if Version(previous_version).is_prerelease: + exit("--previous-version value must be a release not a pre-release") if not github_token: github_token = os.environ.get("GITHUB_TOKEN") if not github_token: console_print("GITHUB_TOKEN is not set! Issue generation will fail.") confirm_action("Do you want to continue?", abort=True) - version_suffix = version[-3:] + version_suffix = version[5:] version_branch = version[:3].replace(".", "-") - version_without_rc = version[:-3] + version_without_rc = version[:5] os.chdir(AIRFLOW_SOURCES_ROOT) airflow_repo_root = os.getcwd() diff --git a/dev/breeze/src/airflow_breeze/commands/testing_commands.py b/dev/breeze/src/airflow_breeze/commands/testing_commands.py index b6ebe571f93cf..752764bd9bb41 100644 --- a/dev/breeze/src/airflow_breeze/commands/testing_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/testing_commands.py @@ -25,7 +25,11 @@ from click import IntRange from airflow_breeze.commands.ci_image_commands import rebuild_or_pull_ci_image_if_needed -from airflow_breeze.global_constants import ALLOWED_TEST_TYPE_CHOICES, all_selective_test_types +from airflow_breeze.global_constants import ( + ALLOWED_HELM_TEST_PACKAGES, + ALLOWED_TEST_TYPE_CHOICES, + all_selective_test_types, +) from airflow_breeze.params.build_prod_params import BuildProdParams from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.ci_group import ci_group @@ -51,7 +55,7 @@ option_verbose, ) from airflow_breeze.utils.console import Output, get_console -from airflow_breeze.utils.custom_param_types import NotVerifiedBetterChoice +from airflow_breeze.utils.custom_param_types import BetterChoice, NotVerifiedBetterChoice from airflow_breeze.utils.docker_command_utils import ( DOCKER_COMPOSE_COMMAND, get_env_variables_for_docker_commands, @@ -545,11 +549,18 @@ def integration_tests( @option_github_repository @option_verbose @option_dry_run +@click.option( + "--helm-test-package", + help="Package to tests", + default="all", + type=BetterChoice(ALLOWED_HELM_TEST_PACKAGES), +) @click.argument("extra_pytest_args", nargs=-1, type=click.UNPROCESSED) def helm_tests( extra_pytest_args: tuple, image_tag: str | None, mount_sources: str, + helm_test_package: str, github_repository: str, ): exec_shell_params = ShellParams( @@ -560,6 +571,8 @@ def helm_tests( env_variables = get_env_variables_for_docker_commands(exec_shell_params) env_variables["RUN_TESTS"] = "true" env_variables["TEST_TYPE"] = "Helm" + if helm_test_package != "all": + env_variables["HELM_TEST_PACKAGE"] = helm_test_package perform_environment_checks() cleanup_python_generated_files() cmd = [*DOCKER_COMPOSE_COMMAND, "run", "--service-ports", "--rm", "airflow"] diff --git a/dev/breeze/src/airflow_breeze/commands/testing_commands_config.py b/dev/breeze/src/airflow_breeze/commands/testing_commands_config.py index 80b5d60911332..5696510939a0f 100644 --- a/dev/breeze/src/airflow_breeze/commands/testing_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/testing_commands_config.py @@ -88,6 +88,7 @@ "options": [ "--image-tag", "--mount-sources", + "--helm-test-package", "--github-repository", ], }, diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 666e860244eab..22088120c843e 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -88,7 +88,7 @@ ALLOWED_MYSQL_VERSIONS = ["5.7", "8"] ALLOWED_MSSQL_VERSIONS = ["2017-latest", "2019-latest"] -PIP_VERSION = "23.0.1" +PIP_VERSION = "23.1" @lru_cache(maxsize=None) @@ -115,6 +115,23 @@ class SelectiveUnitTestTypes(Enum): "Quarantine", ] + +@lru_cache(maxsize=None) +def all_helm_test_packages() -> list[str]: + return sorted( + [ + candidate.name + for candidate in (AIRFLOW_SOURCES_ROOT / "tests" / "charts").iterdir() + if candidate.is_dir() + ] + ) + + +ALLOWED_HELM_TEST_PACKAGES = [ + "all", + *all_helm_test_packages(), +] + ALLOWED_PACKAGE_FORMATS = ["wheel", "sdist", "both"] ALLOWED_INSTALLATION_PACKAGE_FORMATS = ["wheel", "sdist"] ALLOWED_INSTALLATION_METHODS = [".", "apache-airflow"] diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 6ce85c0f0aac6..36a85d56dc8bb 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -58,6 +58,7 @@ KIND_VERSION, GithubEvents, SelectiveUnitTestTypes, + all_helm_test_packages, all_selective_test_types, ) from airflow_breeze.utils.console import get_console @@ -644,3 +645,7 @@ def debug_resources(self) -> bool: @cached_property def suspended_providers_folders(self) -> str: return " ".join(get_suspended_providers_folders()) + + @cached_property + def helm_test_packages(self) -> str: + return json.dumps(all_helm_test_packages()) diff --git a/docker_tests/test_prod_image.py b/docker_tests/test_prod_image.py index 99ddb4730fcdb..e76f04dfd88b3 100644 --- a/docker_tests/test_prod_image.py +++ b/docker_tests/test_prod_image.py @@ -20,6 +20,7 @@ import os import subprocess import tempfile +from importlib.util import find_spec from pathlib import Path import pytest @@ -161,7 +162,6 @@ def test_pip_dependencies_conflict(self): "grpc": ["grpc", "google.auth", "google_auth_httplib2"], "hashicorp": ["hvac"], "ldap": ["ldap"], - "mysql": ["mysql"], "postgres": ["psycopg2"], "pyodbc": ["pyodbc"], "redis": ["redis"], @@ -171,6 +171,8 @@ def test_pip_dependencies_conflict(self): "statsd": ["statsd"], "virtualenv": ["virtualenv"], } + if bool(find_spec("mysql")): + PACKAGE_IMPORTS["mysql"] = ["mysql"] @pytest.mark.skipif(os.environ.get("TEST_SLIM_IMAGE") == "true", reason="Skipped with slim image") @pytest.mark.parametrize("package_name,import_names", PACKAGE_IMPORTS.items()) diff --git a/docs/apache-airflow-providers-amazon/operators/_partials/prerequisite_tasks.rst b/docs/apache-airflow-providers-amazon/_partials/prerequisite_tasks.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/_partials/prerequisite_tasks.rst rename to docs/apache-airflow-providers-amazon/_partials/prerequisite_tasks.rst diff --git a/docs/apache-airflow-providers-amazon/index.rst b/docs/apache-airflow-providers-amazon/index.rst index 9abb85bb3c763..2bae9ea118f4e 100644 --- a/docs/apache-airflow-providers-amazon/index.rst +++ b/docs/apache-airflow-providers-amazon/index.rst @@ -27,6 +27,7 @@ Content Connection types Operators + Transfers Deferrable Operators Secrets backends Logging for Tasks diff --git a/docs/apache-airflow-providers-amazon/operators/appflow.rst b/docs/apache-airflow-providers-amazon/operators/appflow.rst index 65604ee97b7ab..8ef7e9c288fc6 100644 --- a/docs/apache-airflow-providers-amazon/operators/appflow.rst +++ b/docs/apache-airflow-providers-amazon/operators/appflow.rst @@ -33,7 +33,7 @@ AWS PrivateLink, reducing exposure to security threats. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/athena.rst b/docs/apache-airflow-providers-amazon/operators/athena.rst index 7e1d4e45e6b92..a47227988291c 100644 --- a/docs/apache-airflow-providers-amazon/operators/athena.rst +++ b/docs/apache-airflow-providers-amazon/operators/athena.rst @@ -28,7 +28,7 @@ to your data in S3, define the schema, and start querying using standard SQL. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/batch.rst b/docs/apache-airflow-providers-amazon/operators/batch.rst index ba280cb38d37e..bcfb86dbf73f6 100644 --- a/docs/apache-airflow-providers-amazon/operators/batch.rst +++ b/docs/apache-airflow-providers-amazon/operators/batch.rst @@ -27,7 +27,7 @@ infrastructure. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/cloudformation.rst b/docs/apache-airflow-providers-amazon/operators/cloudformation.rst index 6b2730c348192..4051be0ccd225 100644 --- a/docs/apache-airflow-providers-amazon/operators/cloudformation.rst +++ b/docs/apache-airflow-providers-amazon/operators/cloudformation.rst @@ -29,7 +29,7 @@ create and delete a collection of resources together as a single unit (a stack). Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/datasync.rst b/docs/apache-airflow-providers-amazon/operators/datasync.rst index b0b6100f4497b..aca6d5d755145 100644 --- a/docs/apache-airflow-providers-amazon/operators/datasync.rst +++ b/docs/apache-airflow-providers-amazon/operators/datasync.rst @@ -26,7 +26,7 @@ the internet or AWS Direct Connect. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/dms.rst b/docs/apache-airflow-providers-amazon/operators/dms.rst index c741735b866bd..3a9f38e72ad25 100644 --- a/docs/apache-airflow-providers-amazon/operators/dms.rst +++ b/docs/apache-airflow-providers-amazon/operators/dms.rst @@ -34,7 +34,7 @@ different database engines, such as from an Oracle database to a PostgreSQL data Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/dynamodb.rst b/docs/apache-airflow-providers-amazon/operators/dynamodb.rst new file mode 100644 index 0000000000000..3333a6648daf4 --- /dev/null +++ b/docs/apache-airflow-providers-amazon/operators/dynamodb.rst @@ -0,0 +1,55 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +=============== +Amazon DynamoDB +=============== + +`Amazon DynamoDB `__ Amazon DynamoDB is a +fully managed, serverless, key-value NoSQL database designed to run +high-performance applications at any scale. DynamoDB offers built-in security, +continuous backups, automated multi-Region replication, in-memory caching, and +data import and export tools. + +Prerequisite Tasks +------------------ + +.. include:: ../_partials/prerequisite_tasks.rst + + +Sensors +------- + +.. _howto/sensor:DynamoDBValueSensor: + +Wait on Amazon DynamoDB item attribute value match +================================================== + +Use the :class:`~airflow.providers.amazon.aws.sensors.dynamodb.DynamoDBValueSensor` +to wait for the presence of a matching DynamoDB item's attribute/value pair. + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_dynamodb.py + :language: python + :start-after: [START howto_sensor_dynamodb] + :dedent: 4 + :end-before: [END howto_sensor_dynamodb] + + +Reference +--------- + +* `AWS boto3 library documentation for DynamoDB `__ diff --git a/docs/apache-airflow-providers-amazon/operators/ec2.rst b/docs/apache-airflow-providers-amazon/operators/ec2.rst index 5796c514e0238..2018d8113fcf9 100644 --- a/docs/apache-airflow-providers-amazon/operators/ec2.rst +++ b/docs/apache-airflow-providers-amazon/operators/ec2.rst @@ -15,9 +15,9 @@ specific language governing permissions and limitations under the License. -========================================= -Amazon Amazon Elastic Compute Cloud (EC2) -========================================= +================================== +Amazon Elastic Compute Cloud (EC2) +================================== `Amazon Elastic Compute Cloud (Amazon EC2) `__ is a web service that provides resizable computing capacity—literally, servers in Amazon's data centers—that you use to build and host your software systems. @@ -25,7 +25,7 @@ computing capacity—literally, servers in Amazon's data centers—that you use Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/ecs.rst b/docs/apache-airflow-providers-amazon/operators/ecs.rst index 2ba584cb77fdf..d513485a9a990 100644 --- a/docs/apache-airflow-providers-amazon/operators/ecs.rst +++ b/docs/apache-airflow-providers-amazon/operators/ecs.rst @@ -28,7 +28,7 @@ Airflow provides operators to run Task Definitions on an ECS cluster. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/eks.rst b/docs/apache-airflow-providers-amazon/operators/eks.rst index 7861ad60948f3..e11b721be37b5 100644 --- a/docs/apache-airflow-providers-amazon/operators/eks.rst +++ b/docs/apache-airflow-providers-amazon/operators/eks.rst @@ -29,7 +29,7 @@ Airflow provides operators to create and interact with the EKS clusters and comp Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/emr.rst b/docs/apache-airflow-providers-amazon/operators/emr/emr.rst similarity index 99% rename from docs/apache-airflow-providers-amazon/operators/emr.rst rename to docs/apache-airflow-providers-amazon/operators/emr/emr.rst index 6597413700a8e..4b517dab8ff3b 100644 --- a/docs/apache-airflow-providers-amazon/operators/emr.rst +++ b/docs/apache-airflow-providers-amazon/operators/emr/emr.rst @@ -30,7 +30,7 @@ Storage Service (Amazon S3) and Amazon DynamoDB. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/emr_eks.rst b/docs/apache-airflow-providers-amazon/operators/emr/emr_eks.rst similarity index 99% rename from docs/apache-airflow-providers-amazon/operators/emr_eks.rst rename to docs/apache-airflow-providers-amazon/operators/emr/emr_eks.rst index ee2ee50ec13b1..dc31c8f984618 100644 --- a/docs/apache-airflow-providers-amazon/operators/emr_eks.rst +++ b/docs/apache-airflow-providers-amazon/operators/emr/emr_eks.rst @@ -26,7 +26,7 @@ Amazon EKS. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/emr_serverless.rst b/docs/apache-airflow-providers-amazon/operators/emr/emr_serverless.rst similarity index 98% rename from docs/apache-airflow-providers-amazon/operators/emr_serverless.rst rename to docs/apache-airflow-providers-amazon/operators/emr/emr_serverless.rst index 15b0c6de0f478..6a30f52825cb4 100644 --- a/docs/apache-airflow-providers-amazon/operators/emr_serverless.rst +++ b/docs/apache-airflow-providers-amazon/operators/emr/emr_serverless.rst @@ -29,7 +29,7 @@ and manage clusters. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/emr/index.rst b/docs/apache-airflow-providers-amazon/operators/emr/index.rst new file mode 100644 index 0000000000000..57d517d3b155d --- /dev/null +++ b/docs/apache-airflow-providers-amazon/operators/emr/index.rst @@ -0,0 +1,28 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + + +Amazon EMR Operators +==================== + + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/docs/apache-airflow-providers-amazon/operators/glue.rst b/docs/apache-airflow-providers-amazon/operators/glue.rst index 0690ec5b72397..e582e9d415b67 100644 --- a/docs/apache-airflow-providers-amazon/operators/glue.rst +++ b/docs/apache-airflow-providers-amazon/operators/glue.rst @@ -27,7 +27,7 @@ your data and putting it to use in minutes instead of months. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/index.rst b/docs/apache-airflow-providers-amazon/operators/index.rst index 428d3758a3649..7c7082148e2fd 100644 --- a/docs/apache-airflow-providers-amazon/operators/index.rst +++ b/docs/apache-airflow-providers-amazon/operators/index.rst @@ -25,5 +25,7 @@ Amazon AWS Operators :maxdepth: 1 :glob: + emr/index + redshift/index + s3/index * - transfer/index diff --git a/docs/apache-airflow-providers-amazon/operators/lambda.rst b/docs/apache-airflow-providers-amazon/operators/lambda.rst index 2ec14eb5ba4b2..79649f106fdee 100644 --- a/docs/apache-airflow-providers-amazon/operators/lambda.rst +++ b/docs/apache-airflow-providers-amazon/operators/lambda.rst @@ -28,7 +28,7 @@ You can set up your code to automatically trigger from other AWS services or cal Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/quicksight.rst b/docs/apache-airflow-providers-amazon/operators/quicksight.rst index 79c7dc500cf38..cbca98d7d59a4 100644 --- a/docs/apache-airflow-providers-amazon/operators/quicksight.rst +++ b/docs/apache-airflow-providers-amazon/operators/quicksight.rst @@ -28,7 +28,7 @@ QuickSight Super-fast, Parallel, In-Memory, Calculation Engine (SPICE). Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/rds.rst b/docs/apache-airflow-providers-amazon/operators/rds.rst index 1cd1cd40701dd..bca9c64af1e0e 100644 --- a/docs/apache-airflow-providers-amazon/operators/rds.rst +++ b/docs/apache-airflow-providers-amazon/operators/rds.rst @@ -27,7 +27,7 @@ common database administration tasks. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/redshift/index.rst b/docs/apache-airflow-providers-amazon/operators/redshift/index.rst new file mode 100644 index 0000000000000..35b0542b8a423 --- /dev/null +++ b/docs/apache-airflow-providers-amazon/operators/redshift/index.rst @@ -0,0 +1,28 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + + +Amazon Redshift Operators +========================= + + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/docs/apache-airflow-providers-amazon/operators/redshift_cluster.rst b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_cluster.rst similarity index 97% rename from docs/apache-airflow-providers-amazon/operators/redshift_cluster.rst rename to docs/apache-airflow-providers-amazon/operators/redshift/redshift_cluster.rst index d61aa4acfcd9d..7c5d22486b39f 100644 --- a/docs/apache-airflow-providers-amazon/operators/redshift_cluster.rst +++ b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_cluster.rst @@ -15,9 +15,9 @@ specific language governing permissions and limitations under the License. -=============== -Amazon Redshift -=============== +========================= +Amazon Redshift (Cluster) +========================= `Amazon Redshift `__ manages all the work of setting up, operating, and scaling a data warehouse: provisioning capacity, monitoring and backing up the cluster, and applying patches and upgrades to @@ -27,7 +27,7 @@ business and customers. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/redshift_data.rst b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst similarity index 97% rename from docs/apache-airflow-providers-amazon/operators/redshift_data.rst rename to docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst index 9ec14dbaeea19..782d0cf6992fb 100644 --- a/docs/apache-airflow-providers-amazon/operators/redshift_data.rst +++ b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst @@ -27,7 +27,7 @@ business and customers. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/redshift_sql.rst b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_sql.rst similarity index 95% rename from docs/apache-airflow-providers-amazon/operators/redshift_sql.rst rename to docs/apache-airflow-providers-amazon/operators/redshift/redshift_sql.rst index 17d43f4097391..1eb25939afcb5 100644 --- a/docs/apache-airflow-providers-amazon/operators/redshift_sql.rst +++ b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_sql.rst @@ -17,9 +17,9 @@ .. _howto/operator:RedshiftSQLOperator: -=============== -Amazon Redshift -=============== +=================== +Amazon Redshift SQL +=================== `Amazon Redshift `__ manages all the work of setting up, operating, and scaling a data warehouse: provisioning capacity, monitoring and backing up the cluster, and applying patches and upgrades to @@ -29,7 +29,7 @@ business and customers. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/glacier.rst b/docs/apache-airflow-providers-amazon/operators/s3/glacier.rst similarity index 98% rename from docs/apache-airflow-providers-amazon/operators/glacier.rst rename to docs/apache-airflow-providers-amazon/operators/s3/glacier.rst index 3a8de01a452ab..9dca7a776cff8 100644 --- a/docs/apache-airflow-providers-amazon/operators/glacier.rst +++ b/docs/apache-airflow-providers-amazon/operators/s3/glacier.rst @@ -25,7 +25,7 @@ and extremely low-cost Amazon S3 cloud storage class for data archiving and long Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/s3/index.rst b/docs/apache-airflow-providers-amazon/operators/s3/index.rst new file mode 100644 index 0000000000000..f0d7f7d0871c3 --- /dev/null +++ b/docs/apache-airflow-providers-amazon/operators/s3/index.rst @@ -0,0 +1,28 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + + +Amazon S3 Operators +=================== + + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/docs/apache-airflow-providers-amazon/operators/s3.rst b/docs/apache-airflow-providers-amazon/operators/s3/s3.rst similarity index 99% rename from docs/apache-airflow-providers-amazon/operators/s3.rst rename to docs/apache-airflow-providers-amazon/operators/s3/s3.rst index 3d594fcdcb2bb..ef9d0864490f9 100644 --- a/docs/apache-airflow-providers-amazon/operators/s3.rst +++ b/docs/apache-airflow-providers-amazon/operators/s3/s3.rst @@ -25,7 +25,7 @@ You can use Amazon S3 to store and retrieve any amount of data at any time, from Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/sagemaker.rst b/docs/apache-airflow-providers-amazon/operators/sagemaker.rst index 1aa82b6cdc697..933d162df8fa4 100644 --- a/docs/apache-airflow-providers-amazon/operators/sagemaker.rst +++ b/docs/apache-airflow-providers-amazon/operators/sagemaker.rst @@ -29,7 +29,7 @@ Airflow provides operators to create and interact with SageMaker Jobs and Pipeli Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/sns.rst b/docs/apache-airflow-providers-amazon/operators/sns.rst index caaa48452795d..903b8d7630b4a 100644 --- a/docs/apache-airflow-providers-amazon/operators/sns.rst +++ b/docs/apache-airflow-providers-amazon/operators/sns.rst @@ -30,7 +30,7 @@ messages (SMS). Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/sqs.rst b/docs/apache-airflow-providers-amazon/operators/sqs.rst index abe55163c97fa..13c806626ee65 100644 --- a/docs/apache-airflow-providers-amazon/operators/sqs.rst +++ b/docs/apache-airflow-providers-amazon/operators/sqs.rst @@ -15,9 +15,9 @@ specific language governing permissions and limitations under the License. -========== -Amazon SQS -========== +================================= +Amazon Simple Queue Service (SQS) +================================= `Amazon Simple Queue Service (SQS) `__ is a fully managed message queuing service that enables you to decouple and scale microservices, distributed systems, and serverless @@ -29,7 +29,7 @@ or requiring other services to be available. Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/operators/step_functions.rst b/docs/apache-airflow-providers-amazon/operators/step_functions.rst index 91984dc5986b6..1f207b4576f98 100644 --- a/docs/apache-airflow-providers-amazon/operators/step_functions.rst +++ b/docs/apache-airflow-providers-amazon/operators/step_functions.rst @@ -26,7 +26,7 @@ machines to execute the steps of your application in a reliable and scalable fas Prerequisite Tasks ------------------ -.. include:: _partials/prerequisite_tasks.rst +.. include:: ../_partials/prerequisite_tasks.rst Operators --------- diff --git a/docs/apache-airflow-providers-amazon/redirects.txt b/docs/apache-airflow-providers-amazon/redirects.txt index e8de475dd33f4..bf35801e962e9 100644 --- a/docs/apache-airflow-providers-amazon/redirects.txt +++ b/docs/apache-airflow-providers-amazon/redirects.txt @@ -15,8 +15,33 @@ # specific language governing permissions and limitations # under the License. -operators/s3_to_redshift.rst operators/transfer/s3_to_redshift.rst -operators/google_api_to_s3_transfer.rst operators/transfer/google_api_to_s3.rst -operators/imap_attachment_to_s3.rst operators/transfer/imap_attachment_to_s3.rst -operators/salesforce_to_s3.rst operators/transfer/salesforce_to_s3.rst +operators/s3_to_redshift.rst transfer/s3_to_redshift.rst +operators/google_api_to_s3_transfer.rst transfer/google_api_to_s3.rst +operators/imap_attachment_to_s3.rst transfer/imap_attachment_to_s3.rst +operators/salesforce_to_s3.rst transfer/salesforce_to_s3.rst operators/sqs_publish.rst operators/sqs.rst +operators/emr.rst operators/emr/emr.rst +operators/emr_eks.rst operators/emr/emr_eks.rst +operators/emr_serverless.rst operators/emr/emr_serverless.rst +operators/redshift_sql.rst operators/redshift/redshift_sql.rst +operators/redshift_data.rst operators/redshift/redshift_data.rst +operators/redshift_cluster.rst operators/redshift/redshift_cluster.rst +operators/s3.rst operators/s3/s3.rst +operators/glacier.rst operators/s3/glacier.rst +operators/transfer/dynamodb_to_s3.rst.rst transfer/dynamodb_to_s3.rst.rst +operators/transfer/ftp_to_s3.rst.rst transfer/ftp_to_s3.rst.rst +operators/transfer/gcs_to_s3.rst.rst transfer/gcs_to_s3.rst.rst +operators/transfer/glacier_to_gcs.rst transfer/glacier_to_gcs.rst +operators/transfer/google_api_to_s3.rst transfer/google_api_to_s3.rst +operators/transfer/hive_to_dynamodb.rst transfer/hive_to_dynamodb.rst +operators/transfer/imap_attachment_to_s3.rst transfer/imap_attachment_to_s3.rst +operators/transfer/local_to_s3.rst transfer/local_to_s3.rst +operators/transfer/mongo_to_s3.rst transfer/mongo_to_s3.rst +operators/transfer/redshift_to_s3.rst transfer/redshift_to_s3.rst +operators/transfer/s3_to_ftp.rst transfer/s3_to_ftp.rst +operators/transfer/s3_to_redshift.rst transfer/s3_to_redshift.rst +operators/transfer/s3_to_sftp.rst transfer/s3_to_sftp.rst +operators/transfer/s3_to_sql.rst transfer/s3_to_sql.rst +operators/transfer/salesforce_to_s3.rst transfer/salesforce_to_s3.rst +operators/transfer/sftp_to_s3.rst transfer/sftp_to_s3.rst +operators/transfer/sql_to_s3.rst transfer/sql_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/dynamodb_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/dynamodb_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/dynamodb_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/dynamodb_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/ftp_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/ftp_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/ftp_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/ftp_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/gcs_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/gcs_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/gcs_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/gcs_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst b/docs/apache-airflow-providers-amazon/transfer/glacier_to_gcs.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst rename to docs/apache-airflow-providers-amazon/transfer/glacier_to_gcs.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/google_api_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/google_api_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/google_api_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/google_api_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/hive_to_dynamodb.rst b/docs/apache-airflow-providers-amazon/transfer/hive_to_dynamodb.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/hive_to_dynamodb.rst rename to docs/apache-airflow-providers-amazon/transfer/hive_to_dynamodb.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/imap_attachment_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/imap_attachment_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/imap_attachment_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/imap_attachment_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/index.rst b/docs/apache-airflow-providers-amazon/transfer/index.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/index.rst rename to docs/apache-airflow-providers-amazon/transfer/index.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/local_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/local_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/local_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/local_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/mongo_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/mongo_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/mongo_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/mongo_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/redshift_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/redshift_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/redshift_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/redshift_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/s3_to_ftp.rst b/docs/apache-airflow-providers-amazon/transfer/s3_to_ftp.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/s3_to_ftp.rst rename to docs/apache-airflow-providers-amazon/transfer/s3_to_ftp.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/s3_to_redshift.rst b/docs/apache-airflow-providers-amazon/transfer/s3_to_redshift.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/s3_to_redshift.rst rename to docs/apache-airflow-providers-amazon/transfer/s3_to_redshift.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/s3_to_sftp.rst b/docs/apache-airflow-providers-amazon/transfer/s3_to_sftp.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/s3_to_sftp.rst rename to docs/apache-airflow-providers-amazon/transfer/s3_to_sftp.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/s3_to_sql.rst b/docs/apache-airflow-providers-amazon/transfer/s3_to_sql.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/s3_to_sql.rst rename to docs/apache-airflow-providers-amazon/transfer/s3_to_sql.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/salesforce_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/salesforce_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/salesforce_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/salesforce_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/sftp_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/sftp_to_s3.rst similarity index 100% rename from docs/apache-airflow-providers-amazon/operators/transfer/sftp_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/sftp_to_s3.rst diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/sql_to_s3.rst b/docs/apache-airflow-providers-amazon/transfer/sql_to_s3.rst similarity index 78% rename from docs/apache-airflow-providers-amazon/operators/transfer/sql_to_s3.rst rename to docs/apache-airflow-providers-amazon/transfer/sql_to_s3.rst index 31223be0f67aa..e01d415942934 100644 --- a/docs/apache-airflow-providers-amazon/operators/transfer/sql_to_s3.rst +++ b/docs/apache-airflow-providers-amazon/transfer/sql_to_s3.rst @@ -50,6 +50,19 @@ Example usage: :start-after: [START howto_transfer_sql_to_s3] :end-before: [END howto_transfer_sql_to_s3] +Grouping +-------- + +We can group the data in the table by passing the ``groupby_kwargs`` param. This param accepts a ``dict`` which will be passed to pandas `groupby() `_ as kwargs. + +Example usage: + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_sql_to_s3.py + :language: python + :dedent: 4 + :start-after: [START howto_transfer_sql_to_s3_with_groupby_param] + :end-before: [END howto_transfer_sql_to_s3_with_groupby_param] + Reference --------- diff --git a/docs/apache-airflow-providers-google/operators/cloud/dataform.rst b/docs/apache-airflow-providers-google/operators/cloud/dataform.rst index db8e0f737651d..37bd4c8ba10d8 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/dataform.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/dataform.rst @@ -25,14 +25,14 @@ Dataform lets you manage data transformation in the Extraction, Loading, and Tra for data integration. After raw data is extracted from source systems and loaded into BigQuery, Dataform helps you to transform it into a well-defined, tested, and documented suite of data tables. -For more information about the task visit `Dataform production documentation `__ +For more information about the task visit `Dataform production documentation `__ Configuration ------------- Before you can use the Dataform operators you need to initialize repository and workspace, for more information -about this visit `Dataform Production documentation `__ +about this visit `Dataform Production documentation `__ Create Repository ----------------- diff --git a/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst b/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst index 4305962399d34..2c01bd95f084f 100644 --- a/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst +++ b/docs/apache-airflow-providers-google/operators/marketing_platform/campaign_manager.rst @@ -19,8 +19,8 @@ Google Campaign Manager Operators ================================= Google Campaign Manager operators allow you to insert, run, get or delete -reports. For more information about the Campaign Manager API check -`official documentation `__. +reports. For more information about the Campaign Manager report API check +`official documentation `__. Prerequisite Tasks ^^^^^^^^^^^^^^^^^^ @@ -124,7 +124,7 @@ The result is saved to :ref:`XCom `, which allows it to be used b .. _howto/operator:GoogleCampaignManagerBatchInsertConversionsOperator: -Inserting a conversions +Inserting conversions ^^^^^^^^^^^^^^^^^^^^^^^ To insert Campaign Manager conversions you can use the @@ -143,7 +143,7 @@ The result is saved to :ref:`XCom `, which allows it to be used b .. _howto/operator:GoogleCampaignManagerBatchUpdateConversionsOperator: -Updating a conversions +Updating conversions ^^^^^^^^^^^^^^^^^^^^^^ To update Campaign Manager conversions you can use the diff --git a/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst b/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst index c0e225f86e462..8a834c885b314 100644 --- a/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst +++ b/docs/apache-airflow-providers-google/operators/marketing_platform/display_video.rst @@ -27,29 +27,6 @@ Prerequisite Tasks .. _howto/operator:GoogleDisplayVideo360CreateReportOperator: -Creating a report -^^^^^^^^^^^^^^^^^ - -This Operator is Deprecated and will be removed soon. Please look at ``GoogleDisplayVideo360CreateQueryOperator``. - -To create Display&Video 360 report use -:class:`~airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360CreateReportOperator`. - -.. exampleinclude:: /../../airflow/providers/google/marketing_platform/example_dags/example_display_video.py - :language: python - :dedent: 4 - :start-after: [START howto_google_display_video_createquery_report_operator] - :end-before: [END howto_google_display_video_createquery_report_operator] - -Use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360CreateReportOperator` -parameters which allow you to dynamically determine values. You can provide body definition using `` -.json`` file as this operator supports this template extension. -The result is saved to :ref:`XCom `, which allows the result to be used by other operators. - -.. _howto/operator:GoogleDisplayVideo360DeleteReportOperator: - - Creating a Query ^^^^^^^^^^^^^^^^ @@ -86,7 +63,7 @@ You can use :ref:`Jinja templating ` with parameters which allow you to dynamically determine values. The result is saved to :ref:`XCom `, which allows the result to be used by other operators. -.. _howto/operator:GoogleDisplayVideo360RunQuerySensor: +.. _howto/operator:GoogleDisplayVideo360DeleteReportOperator: Deleting a report ^^^^^^^^^^^^^^^^^ @@ -97,35 +74,14 @@ To delete Display&Video 360 report use .. exampleinclude:: /../../airflow/providers/google/marketing_platform/example_dags/example_display_video.py :language: python :dedent: 4 - :start-after: [START howto_google_display_video_deletequery_report_operator] - :end-before: [END howto_google_display_video_deletequery_report_operator] + :start-after: [START howto_google_display_video_delete_query_report_operator] + :end-before: [END howto_google_display_video_delete_query_report_operator] You can use :ref:`Jinja templating ` with :template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DeleteReportOperator` parameters which allow you to dynamically determine values. -.. _howto/operator:GoogleDisplayVideo360ReportSensor: - -Waiting for report -^^^^^^^^^^^^^^^^^^ - -This Operator is Deprecated and will be removed soon. Please look: -.. _howto/operators:GoogleDisplayVideo360RunQuerySensor - -To wait for the report use -:class:`~airflow.providers.google.marketing_platform.sensors.display_video.GoogleDisplayVideo360ReportSensor`. - -.. exampleinclude:: /../../airflow/providers/google/marketing_platform/example_dags/example_display_video.py - :language: python - :dedent: 4 - :start-after: [START howto_google_display_video_wait_report_operator] - :end-before: [END howto_google_display_video_wait_report_operator] - -Use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.marketing_platform.sensors.display_video.GoogleDisplayVideo360ReportSensor` -parameters which allow you to dynamically determine values. - -.. _howto/operator:GoogleDisplayVideo360DownloadReportOperator: +.. _howto/operator:GoogleDisplayVideo360RunQuerySensor: Waiting for query ^^^^^^^^^^^^^^^^^ @@ -148,45 +104,19 @@ parameters which allow you to dynamically determine values. Downloading a report ^^^^^^^^^^^^^^^^^^^^ -This Operator is Deprecated and will be removed soon. Please look: -.. _howto/operators:GoogleDisplayVideo360DownloadReportV2Operator - To download a report to GCS bucket use -:class:`~airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DownloadReportOperator`. +:class:`~airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DownloadReportV2Operator`. .. exampleinclude:: /../../airflow/providers/google/marketing_platform/example_dags/example_display_video.py :language: python :dedent: 4 - :start-after: [START howto_google_display_video_getquery_report_operator] - :end-before: [END howto_google_display_video_getquery_report_operator] + :start-after: [START howto_google_display_video_get_report_operator] + :end-before: [END howto_google_display_video_get_report_operator] Use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DownloadReportOperator` +:template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360DownloadReportV2Operator` parameters which allow you to dynamically determine values. - -.. _howto/operator:GoogleDisplayVideo360RunReportOperator: - -Running a report -^^^^^^^^^^^^^^^^ - -This Operator is Deprecated and will be removed soon. Please look: -.. _howto/operators:GoogleDisplayVideo360RunQueryOperator - -To run Display&Video 360 report use -:class:`~airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360RunReportOperator`. - -.. exampleinclude:: /../../airflow/providers/google/marketing_platform/example_dags/example_display_video.py - :language: python - :dedent: 4 - :start-after: [START howto_google_display_video_runquery_report_operator] - :end-before: [END howto_google_display_video_runquery_report_operator] - -Use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.marketing_platform.operators.display_video.GoogleDisplayVideo360RunReportOperator` -parameters which allow you to dynamically determine values. - - .. _howto/operator:GoogleDisplayVideo360DownloadLineItemsOperator: Downloading Line Items diff --git a/docs/apache-airflow-providers-openlineage/commits.rst b/docs/apache-airflow-providers-openlineage/commits.rst new file mode 100644 index 0000000000000..e071470a5a0f5 --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/commits.rst @@ -0,0 +1,27 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Package apache-airflow-providers-openlineage +-------------------------------------------- + +`OpenLineage Provider `__ + + +This is detailed commit list of changes for versions provider package: ``openlineage``. +For high-level changelog, see :doc:`package information including changelog `. diff --git a/docs/apache-airflow-providers-openlineage/guides/developer.rst b/docs/apache-airflow-providers-openlineage/guides/developer.rst new file mode 100644 index 0000000000000..0e1d959c1e010 --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/guides/developer.rst @@ -0,0 +1,53 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Implementing OpenLineage in Operators +------------------------------------- + +OpenLineage defines few methods for implementation for Operators. + +.. code-block:: python + + def get_openlineage_facets_on_start() -> OperatorLineage: + ... + + + def get_openlineage_facets_on_complete(ti: TaskInstance) -> OperatorLineage: + ... + + + def get_openlineage_facets_on_failure(ti: TaskInstance) -> OperatorLineage: + ... + +Those get called respectively when task instance changes state to RUNNING, SUCCESS and FAILED. +It's required to implement ``on_start`` method. +If there's no ``on_complete`` or ``on_failure`` method, the ``on_start`` gets called instead. + +Instead of returning complete OpenLineage event, the provider defines ``OperatorLineage`` structure to be returned by Operators: + +.. code-block:: python + + @define + class OperatorLineage: + inputs: list[Dataset] = Factory(list) + outputs: list[Dataset] = Factory(list) + run_facets: dict[str, BaseFacet] = Factory(dict) + job_facets: dict[str, BaseFacet] = Factory(dict) + +OpenLineage integration takes care to enrich it with things like general Airflow facets, proper event time and type. diff --git a/docs/apache-airflow-providers-openlineage/guides/structure.rst b/docs/apache-airflow-providers-openlineage/guides/structure.rst new file mode 100644 index 0000000000000..c91debd940be8 --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/guides/structure.rst @@ -0,0 +1,32 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Structure of OpenLineage Airflow integration +-------------------------------------------- + +OpenLineage integration implements AirflowPlugin. This allows it to be discovered on Airflow start and +register Airflow Listener. + +The listener is then called when certain events happen in Airflow - when DAGs or TaskInstances start, complete or fail. +For DAGs, the listener runs in Airflow Scheduler. +For TaskInstances, the listener runs on Airflow Worker. + +When TaskInstance listener method gets called, the ``OpenLineageListener`` constructs metadata like event's unique ``run_id`` and event time. +Then, it tries to find valid Extractor for given operator. The Extractors are a framework +for external extraction of metadata from diff --git a/docs/apache-airflow-providers-openlineage/guides/user.rst b/docs/apache-airflow-providers-openlineage/guides/user.rst new file mode 100644 index 0000000000000..8aef3f17c683c --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/guides/user.rst @@ -0,0 +1,38 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Using OpenLineage integration +----------------------------- + +Install +======= + +To use OpenLineage + +Config +====== + +Primary method of configuring OpenLineage Airflow Provider is Airflow configuration. + +One thing that needs to be set up in every case is ``Transport`` - where do you wish for +your events to end up. + +Another option of configuration is using ``openlineage.yml`` file. +Detailed description of that configuration method is in OpenLineage docs +https://openlineage.io/docs/client/python#configuration diff --git a/docs/apache-airflow-providers-openlineage/index.rst b/docs/apache-airflow-providers-openlineage/index.rst new file mode 100644 index 0000000000000..e54f3a1fca91b --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/index.rst @@ -0,0 +1,102 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-openlineage`` +======================================== + +Content +------- + +.. toctree:: + :maxdepth: 1 + :caption: User guide + + Guides + +.. toctree:: + :maxdepth: 1 + :caption: Developer guide + + Guides + +.. toctree:: + :maxdepth: 1 + :caption: Structure of provider + + Guides + +.. toctree:: + :maxdepth: 1 + :caption: Macros + + Macros + +.. toctree:: + :maxdepth: 1 + :caption: References + + Python API <_api/airflow/providers/openlineage/index> + +.. toctree:: + :maxdepth: 1 + :caption: Resources + + PyPI Repository + Installing from sources + +.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + + +.. toctree:: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +Package apache-airflow-providers-openlineage +-------------------------------------------- + +`OpenLineage `__ + + +Release: 1.0.0 + +Provider package +---------------- + +This is a provider package for ``openlineage`` provider. All classes for this provider package +are in ``airflow.providers.openlineage`` python package. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below) +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-openlineage`` + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.6.0`` +================== ================== + +.. include:: ../../airflow/providers/openlineage/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-openlineage/installing-providers-from-sources.rst b/docs/apache-airflow-providers-openlineage/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..1c90205d15b3a --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-openlineage/macros.rst b/docs/apache-airflow-providers-openlineage/macros.rst new file mode 100644 index 0000000000000..0aed55feb9330 --- /dev/null +++ b/docs/apache-airflow-providers-openlineage/macros.rst @@ -0,0 +1,47 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/macros:openlineage: + +OpenLineage Macros +================== + +Invoke as a jinja template, e.g. + +Lineage run id +-------------- +.. code-block:: python + + PythonOperator( + task_id="render_template", + python_callable=my_task_function, + op_args=["{{ lineage_run_id(task, task_instance) }}"], # lineage_run_id macro invoked + provide_context=False, + dag=dag, + ) + +Lineage parent id +----------------- +.. code-block:: python + + PythonOperator( + task_id="render_template", + python_callable=my_task_function, + op_args=["{{ lineage_parent_id(run_id, task_instance) }}"], # macro invoked + provide_context=False, + dag=dag, + ) diff --git a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst index 650e31bdb070c..3d00ebbae93c3 100644 --- a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst +++ b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst @@ -143,7 +143,6 @@ Name Description ``dag_processing.file_path_queue_size`` Number of DAG files to be considered for the next scan ``dag_processing.last_run.seconds_ago.`` Seconds since ```` was last processed ``dag_processing.file_path_queue_size`` Size of the dag file queue. -``scheduler.tasks.running`` Number of tasks running in executor ``scheduler.tasks.starving`` Number of tasks that cannot be scheduled because of no open slot in pool ``scheduler.tasks.executable`` Number of tasks that are ready for execution (set to queued) with respect to pool limits, DAG concurrency, executor state, diff --git a/docs/apache-airflow/administration-and-deployment/security/audit_logs.rst b/docs/apache-airflow/administration-and-deployment/security/audit_logs.rst new file mode 100644 index 0000000000000..33cec373c3f8b --- /dev/null +++ b/docs/apache-airflow/administration-and-deployment/security/audit_logs.rst @@ -0,0 +1,64 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Audit Logs in Airflow +======================== + + +Overview +--------- + +Audit logs are a critical component of any system that needs to maintain a high level of security and compliance. +They provide a way to track user actions and system events, which can be used to troubleshoot issues, detect security breaches, and ensure regulatory compliance. + +In Airflow, audit logs are used to track user actions and system events that occur during the execution of DAGs and tasks. +They are stored in a database and can be accessed through the Airflow UI. + + +Level of Audit Logs +-------------------- + +Audit logs exist at the task level and the user level. + +- Task Level: At the task level, audit logs capture information related to the execution of a task, such as the start time, end time, and status of the task. + +- User Level: At the user level, audit logs capture information related to user actions, such as creating, modifying, or deleting a DAG or task. + + +Location of Audit Logs +---------------------- + +Audit logs can be accessed through the Airflow UI. They are located under the "Admin" tab, and can be viewed by selecting "Audit Logs" from the dropdown menu. + + +Types of Events +--------------- + +Airflow provides a set of predefined events that can be tracked in audit logs. These events include: + +- ``action_trigger_dag``: Triggering a DAG +- ``action_create``: Creating a DAG or task +- ``action_edit``: Modifying a DAG or task +- ``action_delete``: Deleting a DAG or task +- ``action_failed``: Setting a task as failed +- ``action_success``: Setting a task as successful +- ``action_retry``: Retrying a failed task +- ``action_clear``: Clearing a task's state + + +In addition to these predefined events, Airflow allows you to define custom events that can be tracked in audit logs. +This can be done by calling the ``log`` method of the ``TaskInstance`` object. diff --git a/docs/apache-airflow/authoring-and-scheduling/datasets.rst b/docs/apache-airflow/authoring-and-scheduling/datasets.rst index 8fbff61a836a2..605635a4e1d08 100644 --- a/docs/apache-airflow/authoring-and-scheduling/datasets.rst +++ b/docs/apache-airflow/authoring-and-scheduling/datasets.rst @@ -27,7 +27,7 @@ In addition to scheduling DAGs based upon time, they can also be scheduled based .. code-block:: python - from airflow import Dataset + from airflow.datasets import Dataset with DAG(...): MyOperator( @@ -57,7 +57,7 @@ A dataset is defined by a Uniform Resource Identifier (URI): .. code-block:: python - from airflow import Dataset + from airflow.datasets import Dataset example_dataset = Dataset("s3://dataset-bucket/example.csv") diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 24b78af6c4a62..6cf1d6a718763 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -294,6 +294,8 @@ These are extras that provide support for integration with external systems via +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | microsoft.winrm | ``pip install 'apache-airflow[microsoft.winrm]'`` | WinRM hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ +| openlineage | ``pip install 'apache-airflow[openlineage]'`` | Sending OpenLineage events | | ++---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | papermill | ``pip install 'apache-airflow[papermill]'`` | Papermill hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | sftp | ``pip install 'apache-airflow[sftp]'`` | SFTP hooks, operators and sensors | | diff --git a/docs/apache-airflow/howto/docker-compose/docker-compose.yaml b/docs/apache-airflow/howto/docker-compose/docker-compose.yaml index 719fffe8ab253..097b472f05b61 100644 --- a/docs/apache-airflow/howto/docker-compose/docker-compose.yaml +++ b/docs/apache-airflow/howto/docker-compose/docker-compose.yaml @@ -75,6 +75,7 @@ x-airflow-common: volumes: - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs + - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins user: "${AIRFLOW_UID:-50000}:0" depends_on: diff --git a/docs/apache-airflow/howto/docker-compose/index.rst b/docs/apache-airflow/howto/docker-compose/index.rst index 791324e8b1dfa..b18f54097daf9 100644 --- a/docs/apache-airflow/howto/docker-compose/index.rst +++ b/docs/apache-airflow/howto/docker-compose/index.rst @@ -101,6 +101,7 @@ Some directories in the container are mounted, which means that their contents a - ``./dags`` - you can put your DAG files here. - ``./logs`` - contains logs from task execution and scheduler. +- ``./config`` - you can add custom log parser or add ``airflow_local_settings.py`` to configure cluster policy. - ``./plugins`` - you can put your :doc:`custom plugins ` here. This file uses the latest Airflow image (`apache/airflow `__). @@ -124,7 +125,7 @@ You have to make sure to configure them for the docker-compose: .. code-block:: bash - mkdir -p ./dags ./logs ./plugins + mkdir -p ./dags ./logs ./plugins ./config echo -e "AIRFLOW_UID=$(id -u)" > .env See :ref:`Docker Compose environment variables ` diff --git a/docs/apache-airflow/howto/operator/python.rst b/docs/apache-airflow/howto/operator/python.rst index 6e4410bb9be24..e2bf4b11e5ba3 100644 --- a/docs/apache-airflow/howto/operator/python.rst +++ b/docs/apache-airflow/howto/operator/python.rst @@ -70,7 +70,7 @@ PythonVirtualenvOperator ======================== Use the ``@task.virtualenv`` decorator to execute Python callables inside a new Python virtual environment. -The ``virtualenv`` package needs to be installed in the environment that runs Airflow (as optional dependency ``pip install airflow[virtualenv] --constraint ...``). +The ``virtualenv`` package needs to be installed in the environment that runs Airflow (as optional dependency ``pip install apache-airflow[virtualenv] --constraint ...``). .. warning:: The ``@task.virtualenv`` decorator is recommended over the classic :class:`~airflow.operators.python.PythonVirtualenvOperator` diff --git a/docs/apache-airflow/howto/set-up-database.rst b/docs/apache-airflow/howto/set-up-database.rst index c33f9ac0d9f29..b9041c8e28c1e 100644 --- a/docs/apache-airflow/howto/set-up-database.rst +++ b/docs/apache-airflow/howto/set-up-database.rst @@ -299,7 +299,13 @@ We recommend using the ``mysqlclient`` driver and specifying it in your SqlAlche mysql+mysqldb://:@[:]/ We also support the ``mysql-connector-python`` driver, which lets you connect through SSL -without any cert options provided. +without any cert options provided. If you wish to use ``mysql-connector-python`` driver, please install it with extras. + +.. code-block:: text + + $ pip install mysql-connector-python + +The connection string in this case should look like: .. code-block:: text diff --git a/docs/conf.py b/docs/conf.py index 56ea29b6ce2c2..fb6c82c25bd93 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -403,6 +403,10 @@ def _get_rst_filepath_from_path(filepath: pathlib.Path): ) in AirflowConfigParser.deprecated_options.items(): deprecated_options[deprecated_section][deprecated_key] = section, key, since_version + for (section, key), deprecated in AirflowConfigParser.many_to_one_deprecated_options.items(): + for deprecated_section, deprecated_key, since_version in deprecated: + deprecated_options[deprecated_section][deprecated_key] = section, key, since_version + configs = default_config_yaml() # We want the default/example we show in the docs to reflect the value _after_ diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index a3f959e6d4667..74ce10b942457 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -45,7 +45,7 @@ Those are the most common arguments that you use when you want to build a custom +------------------------------------------+------------------------------------------+---------------------------------------------+ | ``AIRFLOW_USER_HOME_DIR`` | ``/home/airflow`` | Home directory of the Airflow user. | +------------------------------------------+------------------------------------------+---------------------------------------------+ -| ``AIRFLOW_PIP_VERSION`` | ``23.0.1`` | PIP version used. | +| ``AIRFLOW_PIP_VERSION`` | ``23.1`` | PIP version used. | +------------------------------------------+------------------------------------------+---------------------------------------------+ | ``ADDITIONAL_PIP_INSTALL_FLAGS`` | | additional ``pip`` flags passed to the | | | | installation commands (except when | diff --git a/docs/exts/docs_build/helm_chart_utils.py b/docs/exts/docs_build/helm_chart_utils.py index e7db194790e9c..40c33cb2b484a 100644 --- a/docs/exts/docs_build/helm_chart_utils.py +++ b/docs/exts/docs_build/helm_chart_utils.py @@ -17,10 +17,11 @@ from __future__ import annotations import os +from pathlib import Path import yaml -CHART_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "chart")) +CHART_DIR = Path(__file__).resolve().parents[2] / "chart" CHART_YAML_PATH = os.path.join(CHART_DIR, "Chart.yaml") diff --git a/docs/integration-logos/openlineage/openlineage.svg b/docs/integration-logos/openlineage/openlineage.svg new file mode 100644 index 0000000000000..78b907000b47e --- /dev/null +++ b/docs/integration-logos/openlineage/openlineage.svg @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index d7074f4cfd28c..3ffe00f60994d 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -320,6 +320,7 @@ customDataImportUids customizability customizable customizations +cwd cx Daemonize daemonize @@ -504,6 +505,7 @@ emr EmrAddSteps EmrCreateJobFlow enableAutoScale +encodable encryptor enqueue enqueued @@ -1020,6 +1022,7 @@ OnFailure onkill Oozie openfaas +openlineage oper Opsgenie opsgenie @@ -1547,6 +1550,7 @@ undead Undeads ungenerated unicode +unindent unittest unittests unix diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 0b19116b29ddf..3995871dc43f4 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -520,7 +520,6 @@ "deps": [ "apache-airflow-providers-common-sql>=1.3.1", "apache-airflow>=2.3.0", - "mysql-connector-python>=8.0.11", "mysqlclient>=1.3.6" ], "cross-providers-deps": [ @@ -554,6 +553,16 @@ ], "cross-providers-deps": [] }, + "openlineage": { + "deps": [ + "apache-airflow-providers-common-sql>=1.3.1", + "apache-airflow>=2.5.2", + "attrs>=22.2", + "openlineage-integration-common>=0.22.0", + "openlineage-python>=0.22.0" + ], + "cross-providers-deps": [] + }, "opsgenie": { "deps": [ "apache-airflow>=2.3.0", diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index 615626a449d76..7a36b71736c6c 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -2,7 +2,7 @@ # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. main:83de6a9bf2b1afecd1f9ce4cd0493733 -build-docs:093cf82ab80aee65c6ada9b16ac60612 +build-docs:a9ee0b7c62d647cc7562df83076cbc2b ci:find-newer-dependencies:8fa2b57f5f0523c928743b235ee3ab5a ci:fix-ownership:fee2c9ec9ef19686792002ae054fecdd ci:free-space:47234aa0a60b0efd84972e6e797379f8 @@ -37,15 +37,15 @@ prod-image:verify:31bc5efada1d70a0a31990025db1a093 prod-image:79bd4cc9de03ab7e1d75f025d75eee46 release-management:create-minor-branch:6a01066dce15e09fb269a8385626657c release-management:generate-constraints:ae30d6ad49a1b2c15b61cb29080fd957 -release-management:generate-issue-content-providers:f3c00ba74e3afc054fe29b65156740ac +release-management:generate-issue-content-providers:649bd0f56af072754af3e5c2580d7039 release-management:prepare-airflow-package:3ac14ea6d2b09614959c0ec4fd564789 -release-management:prepare-provider-documentation:40d540fdfebf6c8ddc4cd151d52b88e6 -release-management:prepare-provider-packages:72dd7c3b19f85024bc9a4939cac5d87c +release-management:prepare-provider-documentation:d2aade2a4f4fb176a89e4db0c1226386 +release-management:prepare-provider-packages:7f5ca821bb9c5f80e1473b7b6584adcc release-management:release-prod-images:c9bc40938e0efad49e51ef66e83f9527 release-management:start-rc-process:6aafbaceabd7b67b9a1af4c2f59abc4c release-management:start-release:acb384d86e02ff5fde1bf971897be17c release-management:verify-provider-packages:88bd609aff6d09d52ab8d80d6e055e7b -release-management:926400d9c6d5c491f0182c5520bbfd68 +release-management:493c6163a3aa0ed6f7dbd4613afc7f3c setup:autocomplete:03343478bf1d0cf9c101d454cdb63b68 setup:check-all-params-in-groups:4d0f8c19cbdb56290055d863b08a3376 setup:config:3ffcd35dd24b486ddf1d08b797e3d017 @@ -58,7 +58,7 @@ start-airflow:5e8460ac38f8e9ea2a0ac7e248fd7bc9 static-checks:543f0c776d0f198e80a0f75058445bb2 stop:e5aa686b4e53707ced4039d8414d5cd6 testing:docker-compose-tests:b86c044b24138af0659a05ed6331576c -testing:helm-tests:94a442e7f3f63b34c4831a84d165690a +testing:helm-tests:936cf28fd84ce4ff5113795fdae9624b testing:integration-tests:225ddb6243cce5fc64f4824b87adfd98 testing:tests:86441445a2b521e8d5aee04d74978451 -testing:68efcf0731170e4ba2029121a5209e3a +testing:2d95034763ee699f2e2fc1804f2fd7f0 diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index 784715a8cce83..790b9fe55da49 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -35,8 +35,8 @@ .breeze-help-r1 { fill: #c5c8c6;font-weight: bold } .breeze-help-r2 { fill: #c5c8c6 } .breeze-help-r3 { fill: #d0b344;font-weight: bold } -.breeze-help-r4 { fill: #868887 } -.breeze-help-r5 { fill: #68a0b3;font-weight: bold } +.breeze-help-r4 { fill: #68a0b3;font-weight: bold } +.breeze-help-r5 { fill: #868887 } .breeze-help-r6 { fill: #98a84b;font-weight: bold } .breeze-help-r7 { fill: #8d7b39 } @@ -190,50 +190,50 @@ -Usage: breeze [OPTIONS] COMMAND [ARGS]... +Usage: breeze [OPTIONSCOMMAND [ARGS]... -╭─ Basic flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10)│ -│[default: 3.7]                                              â”‚ -│--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite]│ -│--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11]│ -│--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ -│--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ -│--integrationIntegration(s) to enable when running (can be more than one).                            â”‚ -│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd |    â”‚ -│statsd | trino)                                                                          â”‚ -│--forward-credentials-fForward local credentials to container when running.│ -│--db-reset-dReset DB when entering the container.│ -│--max-timeMaximum time that the command should take - if it takes longer, the command will fail.│ -│(INTEGER RANGE)                                                                       â”‚ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Basic developer commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets  â”‚ -│if contents of www directory changed.                                                           â”‚ -│static-checks     Run static checks.                                                                              â”‚ -│build-docs        Build documentation in the container.                                                           â”‚ -│stop              Stop running breeze environment.                                                                â”‚ -│shell             Enter breeze environment. this is the default command use when no other is selected.            â”‚ -│exec              Joins the interactive shell of running airflow container.                                       â”‚ -│compile-www-assetsCompiles www assets.                                                                            â”‚ -│cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.               â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced command groups â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│testing                Tools that developers can use to run tests                                                 â”‚ -│ci-image               Tools that developers can use to manually manage CI images                                 â”‚ -│k8s                    Tools that developers use to run Kubernetes tests                                          â”‚ -│prod-image             Tools that developers can use to manually manage PROD images                               â”‚ -│setup                  Tools that developers can use to configure Breeze                                          â”‚ -│release-management     Tools that release managers can use to prepare and manage Airflow releases                 â”‚ -│ci                     Tools that CI workflows use to cleanup/manage CI environment                               â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10)│ +│[default: 3.7]                                              â”‚ +│--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite]│ +│--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11]│ +│--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ +│--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ +│--integrationIntegration(s) to enable when running (can be more than one).                            â”‚ +│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd |    â”‚ +│statsd | trino)                                                                          â”‚ +│--forward-credentials-fForward local credentials to container when running.│ +│--db-reset-dReset DB when entering the container.│ +│--max-timeMaximum time that the command should take - if it takes longer, the command will fail.│ +│(INTEGER RANGE)                                                                       â”‚ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic developer commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets  â”‚ +│if contents of www directory changed.                                                           â”‚ +│static-checks     Run static checks.                                                                              â”‚ +│build-docs        Build documentation in the container.                                                           â”‚ +│stop              Stop running breeze environment.                                                                â”‚ +│shell             Enter breeze environment. this is the default command use when no other is selected.            â”‚ +│exec              Joins the interactive shell of running airflow container.                                       â”‚ +│compile-www-assetsCompiles www assets.                                                                            â”‚ +│cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.               â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced command groups â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│testing                Tools that developers can use to run tests                                                 â”‚ +│ci-image               Tools that developers can use to manually manage CI images                                 â”‚ +│k8s                    Tools that developers use to run Kubernetes tests                                          â”‚ +│prod-image             Tools that developers can use to manually manage PROD images                               â”‚ +│setup                  Tools that developers can use to configure Breeze                                          â”‚ +│release-management     Tools that release managers can use to prepare and manage Airflow releases                 â”‚ +│ci                     Tools that CI workflows use to cleanup/manage CI environment                               â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index 0bcde083c2b86..6e2a229e5fd37 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -1,4 +1,4 @@ - + - + @@ -225,9 +225,12 @@ + + + - Command: build-docs + Command: build-docs @@ -238,66 +241,67 @@ -Usage: breeze build-docs [OPTIONS] +Usage: breeze build-docs [OPTIONS] Build documentation in the container. -╭─ Doc flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--docs-only-dOnly build documentation.│ -│--spellcheck-only-sOnly run spell checking.│ -│--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts    â”‚ -│before the build - useful for a clean build.                                               â”‚ -│--for-productionBuilds documentation for official release i.e. all links point to stable version. Implies  â”‚ -│--clean-build│ -│--package-filterList of packages to consider.                                                              â”‚ -│(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |    â”‚ -│apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                   â”‚ -│apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |        â”‚ -│apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |            â”‚ -│apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |              â”‚ -│apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kylin |           â”‚ -│apache-airflow-providers-apache-livy | apache-airflow-providers-apache-pig |               â”‚ -│apache-airflow-providers-apache-pinot | apache-airflow-providers-apache-spark |            â”‚ -│apache-airflow-providers-apache-sqoop | apache-airflow-providers-arangodb |                â”‚ -│apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                 â”‚ -│apache-airflow-providers-celery | apache-airflow-providers-cloudant |                      â”‚ -│apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |           â”‚ -│apache-airflow-providers-databricks | apache-airflow-providers-datadog |                   â”‚ -│apache-airflow-providers-dbt-cloud | apache-airflow-providers-dingding |                   â”‚ -│apache-airflow-providers-discord | apache-airflow-providers-docker |                       â”‚ -│apache-airflow-providers-elasticsearch | apache-airflow-providers-exasol |                 â”‚ -│apache-airflow-providers-facebook | apache-airflow-providers-ftp |                         â”‚ -│apache-airflow-providers-github | apache-airflow-providers-google |                        â”‚ -│apache-airflow-providers-grpc | apache-airflow-providers-hashicorp |                       â”‚ -│apache-airflow-providers-http | apache-airflow-providers-imap |                            â”‚ -│apache-airflow-providers-influxdb | apache-airflow-providers-jdbc |                        â”‚ -│apache-airflow-providers-jenkins | apache-airflow-providers-microsoft-azure |              â”‚ -│apache-airflow-providers-microsoft-mssql | apache-airflow-providers-microsoft-psrp |       â”‚ -│apache-airflow-providers-microsoft-winrm | apache-airflow-providers-mongo |                â”‚ -│apache-airflow-providers-mysql | apache-airflow-providers-neo4j |                          â”‚ -│apache-airflow-providers-odbc | apache-airflow-providers-openfaas |                        â”‚ -│apache-airflow-providers-opsgenie | apache-airflow-providers-oracle |                      â”‚ -│apache-airflow-providers-pagerduty | apache-airflow-providers-papermill |                  â”‚ -│apache-airflow-providers-plexus | apache-airflow-providers-postgres |                      â”‚ -│apache-airflow-providers-presto | apache-airflow-providers-qubole |                        â”‚ -│apache-airflow-providers-redis | apache-airflow-providers-salesforce |                     â”‚ -│apache-airflow-providers-samba | apache-airflow-providers-segment |                        â”‚ -│apache-airflow-providers-sendgrid | apache-airflow-providers-sftp |                        â”‚ -│apache-airflow-providers-singularity | apache-airflow-providers-slack |                    â”‚ -│apache-airflow-providers-smtp | apache-airflow-providers-snowflake |                       â”‚ -│apache-airflow-providers-sqlite | apache-airflow-providers-ssh |                           â”‚ -│apache-airflow-providers-tableau | apache-airflow-providers-tabular |                      â”‚ -│apache-airflow-providers-telegram | apache-airflow-providers-trino |                       â”‚ -│apache-airflow-providers-vertica | apache-airflow-providers-yandex |                       â”‚ -│apache-airflow-providers-zendesk | docker-stack | helm-chart)                              â”‚ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors.│ -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Doc flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--docs-only-dOnly build documentation.│ +│--spellcheck-only-sOnly run spell checking.│ +│--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts    â”‚ +│before the build - useful for a clean build.                                               â”‚ +│--for-productionBuilds documentation for official release i.e. all links point to stable version. Implies  â”‚ +│--clean-build│ +│--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors.│ +│--package-filterList of packages to consider.                                                              â”‚ +│(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |    â”‚ +│apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                   â”‚ +│apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |        â”‚ +│apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |            â”‚ +│apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |              â”‚ +│apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kylin |           â”‚ +│apache-airflow-providers-apache-livy | apache-airflow-providers-apache-pig |               â”‚ +│apache-airflow-providers-apache-pinot | apache-airflow-providers-apache-spark |            â”‚ +│apache-airflow-providers-apache-sqoop | apache-airflow-providers-arangodb |                â”‚ +│apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                 â”‚ +│apache-airflow-providers-celery | apache-airflow-providers-cloudant |                      â”‚ +│apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |           â”‚ +│apache-airflow-providers-databricks | apache-airflow-providers-datadog |                   â”‚ +│apache-airflow-providers-dbt-cloud | apache-airflow-providers-dingding |                   â”‚ +│apache-airflow-providers-discord | apache-airflow-providers-docker |                       â”‚ +│apache-airflow-providers-elasticsearch | apache-airflow-providers-exasol |                 â”‚ +│apache-airflow-providers-facebook | apache-airflow-providers-ftp |                         â”‚ +│apache-airflow-providers-github | apache-airflow-providers-google |                        â”‚ +│apache-airflow-providers-grpc | apache-airflow-providers-hashicorp |                       â”‚ +│apache-airflow-providers-http | apache-airflow-providers-imap |                            â”‚ +│apache-airflow-providers-influxdb | apache-airflow-providers-jdbc |                        â”‚ +│apache-airflow-providers-jenkins | apache-airflow-providers-microsoft-azure |              â”‚ +│apache-airflow-providers-microsoft-mssql | apache-airflow-providers-microsoft-psrp |       â”‚ +│apache-airflow-providers-microsoft-winrm | apache-airflow-providers-mongo |                â”‚ +│apache-airflow-providers-mysql | apache-airflow-providers-neo4j |                          â”‚ +│apache-airflow-providers-odbc | apache-airflow-providers-openfaas |                        â”‚ +│apache-airflow-providers-openlineage | apache-airflow-providers-opsgenie |                 â”‚ +│apache-airflow-providers-oracle | apache-airflow-providers-pagerduty |                     â”‚ +│apache-airflow-providers-papermill | apache-airflow-providers-plexus |                     â”‚ +│apache-airflow-providers-postgres | apache-airflow-providers-presto |                      â”‚ +│apache-airflow-providers-qubole | apache-airflow-providers-redis |                         â”‚ +│apache-airflow-providers-salesforce | apache-airflow-providers-samba |                     â”‚ +│apache-airflow-providers-segment | apache-airflow-providers-sendgrid |                     â”‚ +│apache-airflow-providers-sftp | apache-airflow-providers-singularity |                     â”‚ +│apache-airflow-providers-slack | apache-airflow-providers-smtp |                           â”‚ +│apache-airflow-providers-snowflake | apache-airflow-providers-sqlite |                     â”‚ +│apache-airflow-providers-ssh | apache-airflow-providers-tableau |                          â”‚ +│apache-airflow-providers-tabular | apache-airflow-providers-telegram |                     â”‚ +│apache-airflow-providers-trino | apache-airflow-providers-vertica |                        â”‚ +│apache-airflow-providers-yandex | apache-airflow-providers-zendesk | docker-stack |        â”‚ +│helm-chart)                                                                                â”‚ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management.svg b/images/breeze/output_release-management.svg index 71f1cdbf178f6..c6d4cabe9eea7 100644 --- a/images/breeze/output_release-management.svg +++ b/images/breeze/output_release-management.svg @@ -35,8 +35,8 @@ .breeze-release-management-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-r2 { fill: #c5c8c6 } .breeze-release-management-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-r4 { fill: #868887 } -.breeze-release-management-r5 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-r4 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-r5 { fill: #868887 } .breeze-release-management-r6 { fill: #98a84b;font-weight: bold } @@ -105,22 +105,22 @@ -Usage: breeze release-management [OPTIONS] COMMAND [ARGS]... +Usage: breeze release-management [OPTIONSCOMMAND [ARGS]... Tools that release managers can use to prepare and manage Airflow releases -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│generate-constraints              Generates pinned constraint files with all extras from setup.py in parallel.    â”‚ -│generate-issue-content-providers  Generates content for issue to test the release.                                â”‚ -│prepare-airflow-package           Prepare sdist/whl package of Airflow.                                           â”‚ -│prepare-provider-documentation    Prepare CHANGELOG, README and COMMITS information for providers.                â”‚ -│prepare-provider-packages         Prepare sdist/whl packages of Airflow Providers.                                â”‚ -│release-prod-images               Release production images to DockerHub (needs DockerHub permissions).           â”‚ -│verify-provider-packages          Verifies if all provider code is following expectations for providers.          â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│generate-constraints              Generates pinned constraint files with all extras from setup.py in parallel.    â”‚ +│generate-issue-content-providers  Generates content for issue to test the release.                                â”‚ +│prepare-airflow-package           Prepare sdist/whl package of Airflow.                                           â”‚ +│prepare-provider-documentation    Prepare CHANGELOGREADME and COMMITS information for providers.                â”‚ +│prepare-provider-packages         Prepare sdist/whl packages of Airflow Providers.                                â”‚ +│release-prod-images               Release production images to DockerHub (needs DockerHub permissions).           â”‚ +│verify-provider-packages          Verifies if all provider code is following expectations for providers.          â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_generate-issue-content-providers.svg b/images/breeze/output_release-management_generate-issue-content-providers.svg index e7ceb94b0e0a0..7be16c5c5ec31 100644 --- a/images/breeze/output_release-management_generate-issue-content-providers.svg +++ b/images/breeze/output_release-management_generate-issue-content-providers.svg @@ -1,4 +1,4 @@ - + - + @@ -147,9 +147,12 @@ + + + - Command: release-management generate-issue-content-providers + Command: release-management generate-issue-content-providers @@ -160,7 +163,7 @@ -Usage: breeze release-management generate-issue-content-providers [OPTIONS] [airbyte | alibaba | amazon | apache.beam +Usage: breeze release-management generate-issue-content-providers [OPTIONS] [airbyte | alibaba | amazon | apache.beam                                                                   | apache.cassandra | apache.drill | apache.druid |                                                                   apache.flink | apache.hdfs | apache.hive |                                                                   apache.impala | apache.kylin | apache.livy | @@ -172,28 +175,29 @@                                                                   | ftp | github | google | grpc | hashicorp | http |                                                                   imap | influxdb | jdbc | jenkins | microsoft.azure |                                                                   microsoft.mssql | microsoft.psrp | microsoft.winrm | -                                                                  mongo | mysql | neo4j | odbc | openfaas | opsgenie | -                                                                  oracle | pagerduty | papermill | plexus | postgres | -                                                                  presto | qubole | redis | salesforce | samba | -                                                                  segment | sendgrid | sftp | singularity | slack | -                                                                  smtp | snowflake | sqlite | ssh | tableau | tabular -                                                                  | telegram | trino | vertica | yandex | zendesk]... - -Generates content for issue to test the release. - -╭─ Generate issue content flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--github-tokenGitHub token used to authenticate. You can set omit it if you have GITHUB_TOKEN env     â”‚ -│variable set. Can be generated with:                                                    â”‚ -│https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status     â”‚ -│(TEXT)                                                                                  â”‚ -│--suffixSuffix to add to the version prepared(TEXT)│ -│--only-available-in-distOnly consider package ids with packages prepared in the dist folder│ -│--excluded-pr-listComa-separated list of PRs to exclude from the issue.(TEXT)│ -│--disable-progressDisable progress bar│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +                                                                  mongo | mysql | neo4j | odbc | openfaas | +                                                                  openlineage | opsgenie | oracle | pagerduty | +                                                                  papermill | plexus | postgres | presto | qubole | +                                                                  redis | salesforce | samba | segment | sendgrid | +                                                                  sftp | singularity | slack | smtp | snowflake | +                                                                  sqlite | ssh | tableau | tabular | telegram | trino +                                                                  | vertica | yandex | zendesk]... + +Generates content for issue to test the release. + +╭─ Generate issue content flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--github-tokenGitHub token used to authenticate. You can set omit it if you have GITHUB_TOKEN env     â”‚ +│variable set. Can be generated with:                                                    â”‚ +│https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status     â”‚ +│(TEXT)                                                                                  â”‚ +│--suffixSuffix to add to the version prepared(TEXT)│ +│--only-available-in-distOnly consider package ids with packages prepared in the dist folder│ +│--excluded-pr-listComa-separated list of PRs to exclude from the issue.(TEXT)│ +│--disable-progressDisable progress bar│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index c834b84177d88..3e00770bf125f 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -1,4 +1,4 @@ - + - + @@ -141,9 +141,12 @@ + + + - Command: release-management prepare-provider-documentation + Command: release-management prepare-provider-documentation @@ -154,7 +157,7 @@ -Usage: breeze release-management prepare-provider-documentation [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management prepare-provider-documentation [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                                 apache.cassandra | apache.drill | apache.druid |                                                                 apache.flink | apache.hdfs | apache.hive |                                                                 apache.impala | apache.kylin | apache.livy | @@ -166,26 +169,27 @@                                                                 github | google | grpc | hashicorp | http | imap |                                                                 influxdb | jdbc | jenkins | microsoft.azure |                                                                 microsoft.mssql | microsoft.psrp | microsoft.winrm | -                                                                mongo | mysql | neo4j | odbc | openfaas | opsgenie | -                                                                oracle | pagerduty | papermill | plexus | postgres | -                                                                presto | qubole | redis | salesforce | samba | segment -                                                                | sendgrid | sftp | singularity | slack | smtp | -                                                                snowflake | sqlite | ssh | tableau | tabular | -                                                                telegram | trino | vertica | yandex | zendesk]... - -Prepare CHANGELOG, README and COMMITS information for providers. - -╭─ Provider documentation preparation flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--debugDrop user in shell instead of running the command. Useful for debugging.│ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -│--base-branch(TEXT)│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +                                                                mongo | mysql | neo4j | odbc | openfaas | openlineage +                                                                | opsgenie | oracle | pagerduty | papermill | plexus | +                                                                postgres | presto | qubole | redis | salesforce | +                                                                samba | segment | sendgrid | sftp | singularity | +                                                                slack | smtp | snowflake | sqlite | ssh | tableau | +                                                                tabular | telegram | trino | vertica | yandex | +                                                                zendesk]... + +Prepare CHANGELOGREADME and COMMITS information for providers. + +╭─ Provider documentation preparation flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--debugDrop user in shell instead of running the command. Useful for debugging.│ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +│--base-branch(TEXT)│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_prepare-provider-packages.svg b/images/breeze/output_release-management_prepare-provider-packages.svg index 0dd411719e858..cdcd5a080e511 100644 --- a/images/breeze/output_release-management_prepare-provider-packages.svg +++ b/images/breeze/output_release-management_prepare-provider-packages.svg @@ -35,8 +35,8 @@ .breeze-release-management-prepare-provider-packages-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-prepare-provider-packages-r2 { fill: #c5c8c6 } .breeze-release-management-prepare-provider-packages-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-prepare-provider-packages-r4 { fill: #868887 } -.breeze-release-management-prepare-provider-packages-r5 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-prepare-provider-packages-r4 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-prepare-provider-packages-r5 { fill: #868887 } .breeze-release-management-prepare-provider-packages-r6 { fill: #8d7b39 } .breeze-release-management-prepare-provider-packages-r7 { fill: #98a84b;font-weight: bold } @@ -154,7 +154,7 @@ -Usage: breeze release-management prepare-provider-packages [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management prepare-provider-packages [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                            apache.cassandra | apache.drill | apache.druid |                                                            apache.flink | apache.hdfs | apache.hive | apache.impala |                                                            apache.kylin | apache.livy | apache.pig | apache.pinot | @@ -165,27 +165,27 @@                                                            | github | google | grpc | hashicorp | http | imap |                                                            influxdb | jdbc | jenkins | microsoft.azure |                                                            microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo -                                                           | mysql | neo4j | odbc | openfaas | opsgenie | oracle | -                                                           pagerduty | papermill | plexus | postgres | presto | qubole -                                                           | redis | salesforce | samba | segment | sendgrid | sftp | -                                                           singularity | slack | smtp | snowflake | sqlite | ssh | -                                                           tableau | tabular | telegram | trino | vertica | yandex | -                                                           zendesk]... +                                                           | mysql | neo4j | odbc | openfaas | openlineage | opsgenie +                                                           | oracle | pagerduty | papermill | plexus | postgres | +                                                           presto | qubole | redis | salesforce | samba | segment | +                                                           sendgrid | sftp | singularity | slack | smtp | snowflake | +                                                           sqlite | ssh | tableau | tabular | telegram | trino | +                                                           vertica | yandex | zendesk]... Prepare sdist/whl packages of Airflow Providers. -╭─ Package flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--package-formatFormat of packages.(wheel | sdist | both)[default: wheel]│ -│--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT)│ -│--package-list-fileRead list of packages from text file (one package per line).(FILENAME)│ -│--debugDrop user in shell instead of running the command. Useful for debugging.│ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Package flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--package-formatFormat of packages.(wheel | sdist | both)[default: wheel]│ +│--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT)│ +│--package-list-fileRead list of packages from text file (one package per line).(FILENAME)│ +│--debugDrop user in shell instead of running the command. Useful for debugging.│ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_static-checks.svg b/images/breeze/output_static-checks.svg index 3eeae4fa8cc04..04b6730555456 100644 --- a/images/breeze/output_static-checks.svg +++ b/images/breeze/output_static-checks.svg @@ -35,8 +35,8 @@ .breeze-static-checks-r1 { fill: #c5c8c6;font-weight: bold } .breeze-static-checks-r2 { fill: #c5c8c6 } .breeze-static-checks-r3 { fill: #d0b344;font-weight: bold } -.breeze-static-checks-r4 { fill: #868887 } -.breeze-static-checks-r5 { fill: #68a0b3;font-weight: bold } +.breeze-static-checks-r4 { fill: #68a0b3;font-weight: bold } +.breeze-static-checks-r5 { fill: #868887 } .breeze-static-checks-r6 { fill: #98a84b;font-weight: bold } .breeze-static-checks-r7 { fill: #8d7b39 } @@ -223,61 +223,61 @@ -Usage: breeze static-checks [OPTIONS] [PRECOMMIT_ARGS]... +Usage: breeze static-checks [OPTIONS] [PRECOMMIT_ARGS]... Run static checks. -╭─ Pre-commit flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--type-tType(s) of the static checks to run.                                                    â”‚ -│(all | black | blacken-docs | check-airflow-config-yaml-consistent |                    â”‚ -│check-airflow-provider-compatibility | check-apache-license-rat |                       â”‚ -│check-base-operator-partial-arguments | check-base-operator-usage |                     â”‚ -│check-boring-cyborg-configuration | check-breeze-top-dependencies-limited |             â”‚ -│check-builtin-literals | check-changelog-has-no-duplicates |                            â”‚ -│check-core-deprecation-classes | check-daysago-import-from-utils |                      â”‚ -│check-decorated-operator-implements-custom-name | check-docstring-param-types |         â”‚ -│check-example-dags-urls | check-executables-have-shebangs |                             â”‚ -│check-extra-packages-references | check-extras-order | check-for-inclusive-language |   â”‚ -│check-hooks-apply | check-incorrect-use-of-LoggingMixin | check-init-decorator-arguments│ -│| check-lazy-logging | check-links-to-example-dags-do-not-use-hardcoded-versions |      â”‚ -│check-merge-conflict | check-newsfragments-are-valid |                                  â”‚ -│check-no-providers-in-core-examples | check-no-relative-imports |                       â”‚ -│check-only-new-session-with-provide-session |                                           â”‚ -│check-persist-credentials-disabled-in-github-workflows |                                â”‚ -│check-pre-commit-information-consistent | check-provide-create-sessions-imports |       â”‚ -│check-provider-yaml-valid | check-providers-init-file-missing |                         â”‚ -│check-providers-subpackages-init-file-exist | check-pydevd-left-in-code |               â”‚ -│check-revision-heads-map | check-safe-filter-usage-in-html | check-setup-order |        â”‚ -│check-start-date-not-used-in-defaults | check-system-tests-present |                    â”‚ -│check-system-tests-tocs | check-urlparse-usage-in-code | check-xml | codespell |        â”‚ -│compile-www-assets | compile-www-assets-dev | create-missing-init-py-files-tests |      â”‚ -│debug-statements | detect-private-key | doctoc | end-of-file-fixer | fix-encoding-pragma│ -│| flynt | identity | insert-license | lint-chart-schema | lint-css | lint-dockerfile |  â”‚ -│lint-helm-chart | lint-json-schema | lint-markdown | lint-openapi | mixed-line-ending | â”‚ -│mypy-core | mypy-dev | mypy-docs | mypy-providers | pretty-format-json |                â”‚ -│python-no-log-warn | replace-bad-characters | rst-backticks | ruff | shellcheck |       â”‚ -│trailing-whitespace | ts-compile-format-lint-www | update-black-version |               â”‚ -│update-breeze-cmd-output | update-breeze-readme-config-hash |                           â”‚ -│update-common-sql-api-stubs | update-er-diagram | update-extras |                       â”‚ -│update-in-the-wild-to-be-sorted | update-inlined-dockerfile-scripts |                   â”‚ -│update-installed-providers-to-be-sorted | update-local-yml-file |                       â”‚ -│update-migration-references | update-providers-dependencies |                           â”‚ -│update-spelling-wordlist-to-be-sorted | update-supported-versions |                     â”‚ -│update-vendored-in-k8s-json-schema | update-version | yamllint)                         â”‚ -│--file-fList of files to run the checks on.(PATH)│ -│--all-files-aRun checks on all files.│ -│--show-diff-on-failure-sShow diff for files modified by the checks.│ -│--last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref.│ -│--commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference).        â”‚ -│Mutually exclusive with --last-commit.                                                  â”‚ -│(TEXT)                                                                                  â”‚ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Pre-commit flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--type-tType(s) of the static checks to run.                                                    â”‚ +│(all | black | blacken-docs | check-airflow-config-yaml-consistent |                    â”‚ +│check-airflow-provider-compatibility | check-apache-license-rat |                       â”‚ +│check-base-operator-partial-arguments | check-base-operator-usage |                     â”‚ +│check-boring-cyborg-configuration | check-breeze-top-dependencies-limited |             â”‚ +│check-builtin-literals | check-changelog-has-no-duplicates |                            â”‚ +│check-core-deprecation-classes | check-daysago-import-from-utils |                      â”‚ +│check-decorated-operator-implements-custom-name | check-docstring-param-types |         â”‚ +│check-example-dags-urls | check-executables-have-shebangs |                             â”‚ +│check-extra-packages-references | check-extras-order | check-for-inclusive-language |   â”‚ +│check-hooks-apply | check-incorrect-use-of-LoggingMixin | check-init-decorator-arguments│ +│| check-lazy-logging | check-links-to-example-dags-do-not-use-hardcoded-versions |      â”‚ +│check-merge-conflict | check-newsfragments-are-valid |                                  â”‚ +│check-no-providers-in-core-examples | check-no-relative-imports |                       â”‚ +│check-only-new-session-with-provide-session |                                           â”‚ +│check-persist-credentials-disabled-in-github-workflows |                                â”‚ +│check-pre-commit-information-consistent | check-provide-create-sessions-imports |       â”‚ +│check-provider-yaml-valid | check-providers-init-file-missing |                         â”‚ +│check-providers-subpackages-init-file-exist | check-pydevd-left-in-code |               â”‚ +│check-revision-heads-map | check-safe-filter-usage-in-html | check-setup-order |        â”‚ +│check-start-date-not-used-in-defaults | check-system-tests-present |                    â”‚ +│check-system-tests-tocs | check-urlparse-usage-in-code | check-xml | codespell |        â”‚ +│compile-www-assets | compile-www-assets-dev | create-missing-init-py-files-tests |      â”‚ +│debug-statements | detect-private-key | doctoc | end-of-file-fixer | fix-encoding-pragma│ +│| flynt | identity | insert-license | lint-chart-schema | lint-css | lint-dockerfile |  â”‚ +│lint-helm-chart | lint-json-schema | lint-markdown | lint-openapi | mixed-line-ending | â”‚ +│mypy-core | mypy-dev | mypy-docs | mypy-providers | pretty-format-json |                â”‚ +│python-no-log-warn | replace-bad-characters | rst-backticks | ruff | shellcheck |       â”‚ +│trailing-whitespace | ts-compile-format-lint-www | update-black-version |               â”‚ +│update-breeze-cmd-output | update-breeze-readme-config-hash |                           â”‚ +│update-common-sql-api-stubs | update-er-diagram | update-extras |                       â”‚ +│update-in-the-wild-to-be-sorted | update-inlined-dockerfile-scripts |                   â”‚ +│update-installed-providers-to-be-sorted | update-local-yml-file |                       â”‚ +│update-migration-references | update-providers-dependencies |                           â”‚ +│update-spelling-wordlist-to-be-sorted | update-supported-versions |                     â”‚ +│update-vendored-in-k8s-json-schema | update-version | yamllint)                         â”‚ +│--file-fList of files to run the checks on.(PATH)│ +│--all-files-aRun checks on all files.│ +│--show-diff-on-failure-sShow diff for files modified by the checks.│ +│--last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref.│ +│--commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference).        â”‚ +│Mutually exclusive with --last-commit.                                                  â”‚ +│(TEXT)                                                                                  â”‚ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_testing_helm-tests.svg b/images/breeze/output_testing_helm-tests.svg index 9983efe5eb0cb..34a6c6de0158b 100644 --- a/images/breeze/output_testing_helm-tests.svg +++ b/images/breeze/output_testing_helm-tests.svg @@ -1,4 +1,4 @@ - +