From 126b507e825df6302f31024cd9b961b50d18a165 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Fri, 18 Aug 2023 15:28:15 +0200 Subject: [PATCH 001/117] Remove openlineage exclusion (#33491) Now that Airflow 2.7.0 is released, we can remove exclusion that we had for openlineage which prevented from using it as dependency of Airflow in CI. (cherry picked from commit 008f2335f8eb40624e164a2cc608148bd356d17e) --- Dockerfile | 2 +- .../src/airflow_breeze/global_constants.py | 1 + docs/docker-stack/build-arg-ref.rst | 1 + images/breeze/output-commands-hash.txt | 4 +- images/breeze/output-commands.svg | 108 +++++----- images/breeze/output_prod-image.svg | 24 +-- images/breeze/output_prod-image_build.svg | 186 +++++++++--------- scripts/ci/installed_providers.txt | 1 + scripts/in_container/_in_container_utils.sh | 7 - .../in_container/run_generate_constraints.sh | 2 +- 10 files changed, 166 insertions(+), 170 deletions(-) diff --git a/Dockerfile b/Dockerfile index b71a7a0f1949..5e09803b7ddf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ # much smaller. # # Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" +ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" ARG ADDITIONAL_AIRFLOW_EXTRAS="" ARG ADDITIONAL_PYTHON_DEPS="" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 8af1cc5604fc..c9a9066f0721 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -358,6 +358,7 @@ def get_airflow_extras(): "microsoft.azure", "mysql", "odbc", + "openlineage", "pandas", "postgres", "redis", diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index a142968b5b4f..9b2cd6f20979 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -98,6 +98,7 @@ List of default extras in the production Dockerfile: * microsoft.azure * mysql * odbc +* openlineage * pandas * postgres * redis diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index eab332d8aa67..ece518a8740d 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -32,10 +32,10 @@ k8s:status:1529ccd444b41c4b0b5f943289957100 k8s:tests:2a1e2928faea2eddafaff94176a46690 k8s:upload-k8s-image:6b3a20cdeb692f3c3d727f6b9e68c901 k8s:8c1e4287deb0533a74f3b302f9c574be -prod-image:build:d577e6666008b1b1d5097563fcccc9fa +prod-image:build:7b971535fd4a1b93bebacd58b52b073a prod-image:pull:76f1f27e6119928412abecf153fce4bb prod-image:verify:bd2b78738a7c388dbad6076c41a9f906 -prod-image:117674c83c188c7afca068575fa5314d +prod-image:6877cb974df8918504234536f6a35886 release-management:add-back-references:0d4eb5ed82e5381bc630b343ba605a72 release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:b8fcaf8f0acd35ed5dbd48659bdb6485 diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index 65ed6cdbb0c2..38d9a6c3665e 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -35,8 +35,8 @@ .breeze-help-r1 { fill: #c5c8c6;font-weight: bold } .breeze-help-r2 { fill: #c5c8c6 } .breeze-help-r3 { fill: #d0b344;font-weight: bold } -.breeze-help-r4 { fill: #68a0b3;font-weight: bold } -.breeze-help-r5 { fill: #868887 } +.breeze-help-r4 { fill: #868887 } +.breeze-help-r5 { fill: #68a0b3;font-weight: bold } .breeze-help-r6 { fill: #98a84b;font-weight: bold } .breeze-help-r7 { fill: #8d7b39 } @@ -217,59 +217,59 @@ -Usage: breeze [OPTIONSCOMMAND [ARGS]... +Usage: breeze [OPTIONS] COMMAND [ARGS]... -╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                             -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |      -statsd | statsd | trino)                                                                  ---forward-credentials-fForward local credentials to container when running. ---db-reset-dReset DB when entering the container. ---max-timeMaximum time that the command should take - if it takes longer, the command will fail. -(INTEGER RANGE)                                                                        ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) -[default: autodetect]                                          -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Developer commands ─────────────────────────────────────────────────────────────────────────────────────────────────╮ -start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets   -if contents of www directory changed.                                                            -static-checks     Run static checks.                                                                               -build-docs        Build documents.                                                                                 -down              Stop running breeze environment.                                                                 -shell             Enter breeze environment. this is the default command use when no other is selected.             -exec              Joins the interactive shell of running airflow container.                                        -compile-www-assetsCompiles www assets.                                                                             -cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.                -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Testing commands ───────────────────────────────────────────────────────────────────────────────────────────────────╮ -testing        Tools that developers can use to run tests                                                          -k8s            Tools that developers use to run Kubernetes tests                                                   -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Image commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -ci-image         Tools that developers can use to manually manage CI images                                        -prod-image       Tools that developers can use to manually manage PROD images                                      -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Release management commands ────────────────────────────────────────────────────────────────────────────────────────╮ -release-management     Tools that release managers can use to prepare and manage Airflow releases                  -sbom                   Tools that release managers can use to prepare sbom information                             -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Other commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -setup     Tools that developers can use to configure Breeze                                                        -ci        Tools that CI workflows use to cleanup/manage CI environment                                             -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                             +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |      +statsd | statsd | trino)                                                                  +--forward-credentials-fForward local credentials to container when running. +--db-reset-dReset DB when entering the container. +--max-timeMaximum time that the command should take - if it takes longer, the command will fail. +(INTEGER RANGE)                                                                        +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +[default: autodetect]                                          +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Developer commands ─────────────────────────────────────────────────────────────────────────────────────────────────╮ +start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets   +if contents of www directory changed.                                                            +static-checks     Run static checks.                                                                               +build-docs        Build documents.                                                                                 +down              Stop running breeze environment.                                                                 +shell             Enter breeze environment. this is the default command use when no other is selected.             +exec              Joins the interactive shell of running airflow container.                                        +compile-www-assetsCompiles www assets.                                                                             +cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.                +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Testing commands ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +testing        Tools that developers can use to run tests                                                          +k8s            Tools that developers use to run Kubernetes tests                                                   +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Image commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +ci-image         Tools that developers can use to manually manage CI images                                        +prod-image       Tools that developers can use to manually manage PROD images                                      +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Release management commands ────────────────────────────────────────────────────────────────────────────────────────╮ +release-management     Tools that release managers can use to prepare and manage Airflow releases                  +sbom                   Tools that release managers can use to prepare sbom information                             +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Other commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +setup     Tools that developers can use to configure Breeze                                                        +ci        Tools that CI workflows use to cleanup/manage CI environment                                             +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_prod-image.svg b/images/breeze/output_prod-image.svg index 35bf4ae86bb6..cd179a225efe 100644 --- a/images/breeze/output_prod-image.svg +++ b/images/breeze/output_prod-image.svg @@ -35,8 +35,8 @@ .breeze-prod-image-r1 { fill: #c5c8c6;font-weight: bold } .breeze-prod-image-r2 { fill: #c5c8c6 } .breeze-prod-image-r3 { fill: #d0b344;font-weight: bold } -.breeze-prod-image-r4 { fill: #68a0b3;font-weight: bold } -.breeze-prod-image-r5 { fill: #868887 } +.breeze-prod-image-r4 { fill: #868887 } +.breeze-prod-image-r5 { fill: #68a0b3;font-weight: bold } .breeze-prod-image-r6 { fill: #98a84b;font-weight: bold } @@ -93,18 +93,18 @@ -Usage: breeze prod-image [OPTIONSCOMMAND [ARGS]... +Usage: breeze prod-image [OPTIONS] COMMAND [ARGS]... -Tools that developers can use to manually manage PROD images +Tools that developers can use to manually manage PROD images -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ -build  Build Production image. Include building multiple images for all or selected Python versions sequentially.  -pull   Pull and optionally verify Production images - possibly in parallel for all Python versions.                -verify Verify Production image.                                                                                    -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ +build  Build Production image. Include building multiple images for all or selected Python versions sequentially.  +pull   Pull and optionally verify Production images - possibly in parallel for all Python versions.                +verify Verify Production image.                                                                                    +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_prod-image_build.svg b/images/breeze/output_prod-image_build.svg index 3b3973b94572..eefdc8f4ccc0 100644 --- a/images/breeze/output_prod-image_build.svg +++ b/images/breeze/output_prod-image_build.svg @@ -35,8 +35,8 @@ .breeze-prod-image-build-r1 { fill: #c5c8c6;font-weight: bold } .breeze-prod-image-build-r2 { fill: #c5c8c6 } .breeze-prod-image-build-r3 { fill: #d0b344;font-weight: bold } -.breeze-prod-image-build-r4 { fill: #68a0b3;font-weight: bold } -.breeze-prod-image-build-r5 { fill: #868887 } +.breeze-prod-image-build-r4 { fill: #868887 } +.breeze-prod-image-build-r5 { fill: #68a0b3;font-weight: bold } .breeze-prod-image-build-r6 { fill: #98a84b;font-weight: bold } .breeze-prod-image-build-r7 { fill: #8d7b39 } @@ -340,100 +340,100 @@ -Usage: breeze prod-image build [OPTIONS] +Usage: breeze prod-image build [OPTIONS] Build Production image. Include building multiple images for all or selected Python versions sequentially. -╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11)                                  -[default: 3.8]                                               ---install-airflow-version-VInstall version of Airflow from PyPI.(TEXT) ---upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. ---upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       -fails.                                                                          ---image-tag-tTag the image after building it.(TEXT)[default: latest] ---tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful -when you build or pull image with --image-tag.                                  ---docker-cache-cCache option for image used during the build.(registry | local | disabled) -[default: registry]                           -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options for customizing images ─────────────────────────────────────────────────────────────────────────────────────╮ ---install-providers-from-sourcesInstall providers from sources when installing. ---airflow-extrasExtras to install by default.                                                    -(TEXT)                                                                           -[default:                                                                        -aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsear… ---airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      -file. It could be full remote URL to the location file, or local file placed in  -`docker-context-files` (in this case it has to start with                        -/opt/airflow/docker-context-files).                                              -(TEXT)                                                                           ---airflow-constraints-modeMode of constraints for PROD image building.                            -(constraints | constraints-no-providers | constraints-source-providers) -[default: constraints]                                                  ---airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) ---python-imageIf specified this is the base python image used to build the image. Should be    -something like: python:VERSION-slim-bullseye.                                    -(TEXT)                                                                           ---additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) ---additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      -itself).                                                                         -(TEXT)                                                                           ---additional-python-depsAdditional python dependencies to use when building the images.(TEXT) ---additional-runtime-apt-depsAdditional apt runtime dependencies to use when building the images.(TEXT) ---additional-runtime-apt-envAdditional environment variables set when adding runtime dependencies.(TEXT) ---additional-runtime-apt-commandAdditional command executed before runtime apt deps are installed.(TEXT) ---additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) ---additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) ---additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) ---runtime-apt-depsApt runtime dependencies to use when building the images.(TEXT) ---runtime-apt-commandCommand executed before runtime apt deps are installed.(TEXT) ---dev-apt-depsApt dev dependencies to use when building the images.(TEXT) ---dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) ---version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) ---commit-shaCommit SHA that is used to build the images.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Customization options (for specific customization needs) ───────────────────────────────────────────────────────────╮ ---install-packages-from-contextInstall wheels from local docker-context-files when building image.        -Implies --disable-airflow-repo-cache.                                      ---cleanup-contextClean up docker context files before running build (cannot be used         -together with --install-packages-from-context).                            ---disable-mysql-client-installationDo not install MySQL client. ---disable-mssql-client-installationDo not install MsSQl client. ---disable-postgres-client-installationDo not install Postgres client. ---disable-airflow-repo-cacheDisable cache from Airflow repository during building. ---install-airflow-referenceInstall Airflow using GitHub tag or branch.(TEXT) ---installation-methodInstall Airflow from: sources or PyPI.(. | apache-airflow) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Preparing cache and push (for maintainers and CI) ──────────────────────────────────────────────────────────────────╮ ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) -[default: autodetect]                                          ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64 | linux/amd64,linux/arm64) ---pushPush image after building it. ---prepare-buildx-cachePrepares build cache (this is done as separate per-platform steps instead of building the  -image).                                                                                    -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Github authentication ──────────────────────────────────────────────────────────────────────────────────────────────╮ ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---github-tokenThe token used to authenticate to GitHub.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images. +(>3.8< | 3.9 | 3.10 | 3.11)                                  +[default: 3.8]                                               +--install-airflow-version-VInstall version of Airflow from PyPI.(TEXT) +--upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. +--upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       +fails.                                                                          +--image-tag-tTag the image after building it.(TEXT)[default: latest] +--tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful +when you build or pull image with --image-tag.                                  +--docker-cache-cCache option for image used during the build.(registry | local | disabled) +[default: registry]                           +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options for customizing images ─────────────────────────────────────────────────────────────────────────────────────╮ +--install-providers-from-sourcesInstall providers from sources when installing. +--airflow-extrasExtras to install by default.                                                    +(TEXT)                                                                           +[default:                                                                        +aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsear… +--airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      +file. It could be full remote URL to the location file, or local file placed in  +`docker-context-files` (in this case it has to start with                        +/opt/airflow/docker-context-files).                                              +(TEXT)                                                                           +--airflow-constraints-modeMode of constraints for PROD image building.                            +(constraints | constraints-no-providers | constraints-source-providers) +[default: constraints]                                                  +--airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) +--python-imageIf specified this is the base python image used to build the image. Should be    +something like: python:VERSION-slim-bullseye.                                    +(TEXT)                                                                           +--additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) +--additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      +itself).                                                                         +(TEXT)                                                                           +--additional-python-depsAdditional python dependencies to use when building the images.(TEXT) +--additional-runtime-apt-depsAdditional apt runtime dependencies to use when building the images.(TEXT) +--additional-runtime-apt-envAdditional environment variables set when adding runtime dependencies.(TEXT) +--additional-runtime-apt-commandAdditional command executed before runtime apt deps are installed.(TEXT) +--additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) +--additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) +--additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) +--runtime-apt-depsApt runtime dependencies to use when building the images.(TEXT) +--runtime-apt-commandCommand executed before runtime apt deps are installed.(TEXT) +--dev-apt-depsApt dev dependencies to use when building the images.(TEXT) +--dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) +--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) +--commit-shaCommit SHA that is used to build the images.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Customization options (for specific customization needs) ───────────────────────────────────────────────────────────╮ +--install-packages-from-contextInstall wheels from local docker-context-files when building image.        +Implies --disable-airflow-repo-cache.                                      +--cleanup-contextClean up docker context files before running build (cannot be used         +together with --install-packages-from-context).                            +--disable-mysql-client-installationDo not install MySQL client. +--disable-mssql-client-installationDo not install MsSQl client. +--disable-postgres-client-installationDo not install Postgres client. +--disable-airflow-repo-cacheDisable cache from Airflow repository during building. +--install-airflow-referenceInstall Airflow using GitHub tag or branch.(TEXT) +--installation-methodInstall Airflow from: sources or PyPI.(. | apache-airflow) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Preparing cache and push (for maintainers and CI) ──────────────────────────────────────────────────────────────────╮ +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +[default: autodetect]                                          +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64 | linux/amd64,linux/arm64) +--pushPush image after building it. +--prepare-buildx-cachePrepares build cache (this is done as separate per-platform steps instead of building the  +image).                                                                                    +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Github authentication ──────────────────────────────────────────────────────────────────────────────────────────────╮ +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--github-tokenThe token used to authenticate to GitHub.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/scripts/ci/installed_providers.txt b/scripts/ci/installed_providers.txt index 9cdcf765eb06..bd32056d5f32 100644 --- a/scripts/ci/installed_providers.txt +++ b/scripts/ci/installed_providers.txt @@ -14,6 +14,7 @@ imap microsoft.azure mysql odbc +openlineage postgres redis sendgrid diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh index c6a859e40461..16663f47389c 100644 --- a/scripts/in_container/_in_container_utils.sh +++ b/scripts/in_container/_in_container_utils.sh @@ -298,13 +298,6 @@ function install_all_providers_from_pypi_with_eager_upgrade() { for provider_package in ${ALL_PROVIDERS_PACKAGES} do echo -n "Checking if ${provider_package} is available in PyPI: " - if [[ ${provider_package} == "apache-airflow-providers-openlineage" ]]; then - # The openlineage provider has 2.7.0 airflow dependency so it should be excluded for now in - # "pypi" dependency calculation - # We should remove it right after 2.7.0 is released to PyPI and regenerate the 2.7.0 constraints - echo "${COLOR_YELLOW}Skipped until 2.7.0 is released${COLOR_RESET}" - continue - fi res=$(curl --head -s -o /dev/null -w "%{http_code}" "https://pypi.org/project/${provider_package}/") if [[ ${res} == "200" ]]; then packages_to_install+=( "${provider_package}" ) diff --git a/scripts/in_container/run_generate_constraints.sh b/scripts/in_container/run_generate_constraints.sh index 717db0133942..d3b4ad20e09c 100755 --- a/scripts/in_container/run_generate_constraints.sh +++ b/scripts/in_container/run_generate_constraints.sh @@ -93,7 +93,7 @@ elif [[ ${AIRFLOW_CONSTRAINTS_MODE} == "constraints" ]]; then # # 1. Reproducible installation of airflow with selected providers (note constraints are used): # -# pip install "apache-airflow[celery,cncf.kubernetes,google,amazon,snowflake]==X.Y.Z" \ +# pip install "apache-airflow[celery,cncf.kubernetes,google,amazon,snowflake]==X.Y.Z" \\ # --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-X.Y.Z/constraints-3.8.txt" # # 2. Installing own dependencies that are potentially not matching the constraints (note constraints are not From dc44806dc33b34a1e08602a94650a05093d44e86 Mon Sep 17 00:00:00 2001 From: ldacey Date: Sat, 19 Aug 2023 17:58:33 +0800 Subject: [PATCH 002/117] Fix OpenLineage link in New Features notes (#33513) The label is provider:openlineage instead of API-53 (cherry picked from commit de17b939eb37d0bb9e75ec48be2a0e7159e4cd1c) --- RELEASE_NOTES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 90f9d155cfa1..1b316aa009f4 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -142,7 +142,7 @@ New Features - Trigger Button - Implement Part 2 of AIP-50 (#31583) - Removing Executor Coupling from Core Airflow (`AIP-51 `_) - Automatic setup and teardown tasks (`AIP-52 `_) -- OpenLineage in Airflow (`AIP-53 `_) +- OpenLineage in Airflow (`AIP-53 `_) - Experimental: Add a cache to Variable and Connection when called at dag parsing time (#30259) - Enable pools to consider deferred tasks (#32709) - Allows to choose SSL context for SMTP connection (#33070) From a335ca5a58f75d0e962d1ef448fc845f14f7b7dd Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 23 Aug 2023 08:18:14 +0100 Subject: [PATCH 003/117] Update the release date for 2.7.0 --- RELEASE_NOTES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1b316aa009f4..c3986432cfc2 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -21,7 +21,7 @@ .. towncrier release notes start -Airflow 2.7.0 (2023-08-14) +Airflow 2.7.0 (2023-08-18) -------------------------- Significant Changes From 100f37d9a4cd18626effe491fab56957252e7b1d Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Fri, 18 Aug 2023 23:10:17 +0200 Subject: [PATCH 004/117] Remove Pydantic limitation for version < 2 (#33507) We already fixed all deprecation warnings for Pydantic 2 and we can thus remove Pydantic 2 limitation. Even if we are waiting for other dependencies (aws-sam-translator) it should be save to remove the limit - we will get Pydantic 2 when aws-sam-translate new version is released in a week or two (Pydantic 2 support has been added last week in https://github.com/aws/serverless-application-model/pull/3282) (cherry picked from commit 754a4ab396e6e6002f79a716f93ed71712393687) --- setup.cfg | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 16e9930a3866..399ff79bd965 100644 --- a/setup.cfg +++ b/setup.cfg @@ -126,10 +126,7 @@ install_requires = pendulum>=2.0 pluggy>=1.0 psutil>=4.2.0 - # We limit Pydantic to <2.0.0 until we can upgrade - there are limitation for Pydantic in AWS provider - # dependency (aws-sam-translator) - also we need to change orm-mode to from_attributes in definitions - # of the ORM models. See for the previous attempt https://github.com/apache/airflow/pull/33220 - pydantic>=1.10.0,<2.0.0 + pydantic>=1.10.0 pygments>=2.0.1 pyjwt>=2.0.0 python-daemon>=3.0.0 From 698ad804d161f67e512ace862eccffadcd182f00 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 21 Aug 2023 15:15:55 +0200 Subject: [PATCH 005/117] Add MySQL 8.1 to supported versions. (#33576) * Add MySQL 8.1 to supported versions. Anticipating Lazy Consensus to be reached we add 8.1 version of MySQL to supported versions. * Apply suggestions from code review (cherry picked from commit 825f65f67e0732c84a1978c342a4f77d152636b9) --- README.md | 18 +++++++++--------- .../installation/prerequisites.rst | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4f6ec979939e..41b70e132a17 100644 --- a/README.md +++ b/README.md @@ -86,15 +86,15 @@ Airflow is not a streaming solution, but it is often used to process real-time d Apache Airflow is tested with: -| | Main version (dev) | Stable version (2.7.0) | -|-------------|------------------------|---------------------------| -| Python | 3.8, 3.9, 3.10, 3.11 | 3.8, 3.9, 3.10, 3.11 | -| Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | -| Kubernetes | 1.24, 1.25, 1.26, 1.27 | 1.24, 1.25, 1.26, 1.27 | -| PostgreSQL | 11, 12, 13, 14, 15 | 11, 12, 13, 14, 15 | -| MySQL | 5.7, 8 | 5.7, 8 | -| SQLite | 3.15.0+ | 3.15.0+ | -| MSSQL | 2017(\*), 2019(\*) | 2017(\*), 2019(\*) | +| | Main version (dev) | Stable version (2.7.0) | +|-------------|------------------------|------------------------| +| Python | 3.8, 3.9, 3.10, 3.11 | 3.8, 3.9, 3.10, 3.11 | +| Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | +| Kubernetes | 1.24, 1.25, 1.26, 1.27 | 1.24, 1.25, 1.26, 1.27 | +| PostgreSQL | 11, 12, 13, 14, 15 | 11, 12, 13, 14, 15 | +| MySQL | 5.7, 8.0, 8.1 | 5.7, 8.0, 8.1 | +| SQLite | 3.15.0+ | 3.15.0+ | +| MSSQL | 2017(\*), 2019(\*) | 2017(\*), 2019(\*) | \* Experimental diff --git a/docs/apache-airflow/installation/prerequisites.rst b/docs/apache-airflow/installation/prerequisites.rst index 7f8ddec84254..1252431bb385 100644 --- a/docs/apache-airflow/installation/prerequisites.rst +++ b/docs/apache-airflow/installation/prerequisites.rst @@ -25,7 +25,7 @@ Airflow™ is tested with: * Databases: * PostgreSQL: 11, 12, 13, 14, 15 - * MySQL: 5.7, 8 + * MySQL: 5.7, 8.0, 8.1 * SQLite: 3.15.0+ * MSSQL(Experimental): 2017, 2019 From 3fad890393ec0adf388bd7df059d88ce965e7472 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 21 Aug 2023 17:49:17 +0200 Subject: [PATCH 006/117] Fix dependencies for celery and opentelemetry for Python 3.8 (#33579) We used to have problems with `pip` backtracking when we relaxed too much open-telemetry dependencies. It turned out that the backtracting was only happening on Python 3.8 and that it was ultimately caused by conflict between importlib_metadata between Airflow and newer versions of opentelemetry (we had <5 for Python 3.8, they had >6 for all versions. The reason for limiting it in Airflow was Celery that was not working well with importlib 5. Since Celery 5.3 solved the problems (released 6th of June) we can now relax the importlib_metadata limit and set Celery to version >= 5.3.0) which nicely resolves the conflict and there is no more backtracking when trying to install newer versions of opentelemetry for Python 3.8. Fixes: #33577 (cherry picked from commit ae25a52ae342c9e0bc3afdb21d613447c3687f6c) --- airflow/providers/celery/provider.yaml | 2 +- dev/breeze/src/airflow_breeze/utils/path_utils.py | 2 +- generated/provider_dependencies.json | 2 +- setup.cfg | 8 ++------ setup.py | 2 +- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/airflow/providers/celery/provider.yaml b/airflow/providers/celery/provider.yaml index 0eccce68904f..cecb52f93341 100644 --- a/airflow/providers/celery/provider.yaml +++ b/airflow/providers/celery/provider.yaml @@ -43,7 +43,7 @@ dependencies: # Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer # (https://docs.celeryq.dev/en/stable/contributing.html?highlight=semver#versions). # Make sure that the limit here is synchronized with [celery] extra in the airflow core - - celery>=5.2.3,<6 + - celery>=5.3.0,<6 - flower>=1.0.0 integrations: diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 2314f3aa9f47..c9627c5ffb27 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -87,7 +87,7 @@ def get_package_setup_metadata_hash() -> str: try: from importlib.metadata import distribution # type: ignore[attr-defined] except ImportError: - from importlib_metadata import distribution # type: ignore[no-redef] + from importlib_metadata import distribution # type: ignore[no-redef, assignment] prefix = "Package config hash: " diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 42146c27678a..b029917e81e4 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -244,7 +244,7 @@ "celery": { "deps": [ "apache-airflow>=2.4.0", - "celery>=5.2.3,<6", + "celery>=5.3.0,<6", "flower>=1.0.0" ], "cross-providers-deps": [ diff --git a/setup.cfg b/setup.cfg index 399ff79bd965..c2ff2567f853 100644 --- a/setup.cfg +++ b/setup.cfg @@ -102,10 +102,7 @@ install_requires = graphviz>=0.12 gunicorn>=20.1.0 httpx - # Importlib-metadata 5 is breaking Celery import due to regression it introduced - # This was tracked and fixed in https://github.com/celery/celery/pull/7785 but it is not released yet - # We can remove the < 5.0.0 limitation when Celery 5.3.0 gets released and we bump celery to >= 5.3.0 - importlib_metadata>=1.7,<5.0.0;python_version<"3.9" + importlib_metadata>=1.7;python_version<"3.9" importlib_resources>=5.2;python_version<"3.9" itsdangerous>=2.0 jinja2>=3.0.0 @@ -118,8 +115,7 @@ install_requires = markupsafe>=1.1.1 marshmallow-oneofschema>=2.0.1 mdit-py-plugins>=0.3.0 - # Pip can not find a version that satisfies constraints if opentelemetry-api is not pinned. - opentelemetry-api==1.15.0 + opentelemetry-api>=1.15.0 opentelemetry-exporter-otlp packaging>=14.0 pathspec>=0.9.0 diff --git a/setup.py b/setup.py index 6f29dc1f4705..00f4f7f947af 100644 --- a/setup.py +++ b/setup.py @@ -275,7 +275,7 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve # limiting minimum airflow version supported in celery provider due to the # potential breaking changes in Airflow Core as well (celery is added as extra, so Airflow # core is not hard-limited via install-requires, only by extra). - "celery>=5.2.3,<6" + "celery>=5.3.0,<6" ] cgroups = [ # Cgroupspy 0.2.2 added Python 3.10 compatibility From 6005da9ba5b1bb7ce004a69a6d8be75d43e12a53 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 22 Aug 2023 13:27:03 +0200 Subject: [PATCH 007/117] Improve detection of when breeze CI image needs rebuilding (#33603) * Improve detection of when breeze CI image needs rebuilding Previously we have been using provider.yaml file modification as a sign that the docker image needs rebuilding when starting image. However just modification of provider.yaml file is not a sign that the image needs rebuilding. The image needs rebuilding when provider dependencies changed, but there are many more reasons why provider.yaml file changed - especially recently provider.yaml file contains much more information and dependencies are only part of it. Provider.yaml files can also be modified by release manager wnen documentation is prepared, but none of the documentation change is a reason for rebuilding the image. This PR optimize the check for image building introducing two step process: * first we check if provider.yaml files changed * if they did, we regenerate provider dependencies by manully running the pre-commit script * then provider_dependencies.json is used instead of all providers to determine if the image needs rebuilding This has several nice side effects: * the list of files that have been modified displayed to the user is potentially much smaller (no provider.yaml files) * provider_dependencies.json is regenereated automatically when you run any breeze command, which means that you do not have to have pre-commit installed to regenerate it * the notification "image needs rebuilding" will be printed less frequently to the user - only when it is really needed * preparing provider documentation in CI will not trigger image rebuilding (which might occasionally fail in such case especially when we bring back a provider from long suspension like it happened in #33574 * Update dev/breeze/src/airflow_breeze/commands/developer_commands.py (cherry picked from commit ac0d5b3dbe731605af38018ce7ce970ffded539a) --- .../commands/ci_image_commands.py | 6 +- .../commands/developer_commands.py | 3 + .../src/airflow_breeze/global_constants.py | 2 +- .../airflow_breeze/params/build_ci_params.py | 1 + .../src/airflow_breeze/params/shell_params.py | 1 + .../airflow_breeze/utils/md5_build_check.py | 72 +++++++++++++++---- ...re_commit_update_providers_dependencies.py | 34 ++++++--- 7 files changed, 93 insertions(+), 26 deletions(-) diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index 124d6ca31809..2eecbfff3ca8 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -455,7 +455,10 @@ def should_we_run_the_build(build_ci_params: BuildCiParams) -> bool: # We import those locally so that click autocomplete works from inputimeout import TimeoutOccurred - if not md5sum_check_if_build_is_needed(md5sum_cache_dir=build_ci_params.md5sum_cache_dir): + if not md5sum_check_if_build_is_needed( + md5sum_cache_dir=build_ci_params.md5sum_cache_dir, + skip_provider_dependencies_check=build_ci_params.skip_provider_dependencies_check, + ): return False try: answer = user_confirm( @@ -631,6 +634,7 @@ def rebuild_or_pull_ci_image_if_needed(command_params: ShellParams | BuildCiPara image_tag=command_params.image_tag, platform=command_params.platform, force_build=command_params.force_build, + skip_provider_dependencies_check=command_params.skip_provider_dependencies_check, ) if command_params.image_tag is not None and command_params.image_tag != "latest": return_code, message = run_pull_image( diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index f29584bb3266..4a694cac2e71 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -516,6 +516,9 @@ def static_checks( force_build=force_build, image_tag=image_tag, github_repository=github_repository, + # for static checks we do not want to regenerate dependencies before pre-commits are run + # we want the pre-commit to do it for us (and detect the case the dependencies are updated) + skip_provider_dependencies_check=True, ) if not skip_image_check: rebuild_or_pull_ci_image_if_needed(command_params=build_params) diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index c9a9066f0721..d79a6561d2c4 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -305,13 +305,13 @@ def get_airflow_extras(): "setup.cfg", "Dockerfile.ci", ".dockerignore", + "generated/provider_dependencies.json", "scripts/docker/common.sh", "scripts/docker/install_additional_dependencies.sh", "scripts/docker/install_airflow.sh", "scripts/docker/install_airflow_dependencies_from_branch_tip.sh", "scripts/docker/install_from_docker_context_files.sh", "scripts/docker/install_mysql.sh", - *ALL_PROVIDER_YAML_FILES, ] ENABLED_SYSTEMS = "" diff --git a/dev/breeze/src/airflow_breeze/params/build_ci_params.py b/dev/breeze/src/airflow_breeze/params/build_ci_params.py index 8888a7398f36..4ad0b82789b8 100644 --- a/dev/breeze/src/airflow_breeze/params/build_ci_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_ci_params.py @@ -37,6 +37,7 @@ class BuildCiParams(CommonBuildParams): airflow_pre_cached_pip_packages: bool = True force_build: bool = False eager_upgrade_additional_requirements: str = "" + skip_provider_dependencies_check: bool = False @property def airflow_version(self): diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index c8dfd9cd84ca..405fc7bcfb2a 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -125,6 +125,7 @@ class ShellParams: celery_flower: bool = False only_min_version_update: bool = False regenerate_missing_docs: bool = False + skip_provider_dependencies_check: bool = False def clone_with_test(self, test_type: str) -> ShellParams: new_params = deepcopy(self) diff --git a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py index 4397fece4d87..54b46c99164c 100644 --- a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py +++ b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py @@ -20,11 +20,14 @@ from __future__ import annotations import hashlib +import os +import sys from pathlib import Path -from airflow_breeze.global_constants import FILES_FOR_REBUILD_CHECK +from airflow_breeze.global_constants import ALL_PROVIDER_YAML_FILES, FILES_FOR_REBUILD_CHECK from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT +from airflow_breeze.utils.run_utils import run_command def check_md5checksum_in_cache_modified(file_hash: str, cache_path: Path, update: bool) -> bool: @@ -59,8 +62,19 @@ def generate_md5(filename, file_size: int = 65536): return hash_md5.hexdigest() +def check_md5_sum_for_file(file_to_check: str, md5sum_cache_dir: Path, update: bool): + file_to_get_md5 = AIRFLOW_SOURCES_ROOT / file_to_check + md5_checksum = generate_md5(file_to_get_md5) + sub_dir_name = file_to_get_md5.parts[-2] + actual_file_name = file_to_get_md5.parts[-1] + cache_file_name = Path(md5sum_cache_dir, sub_dir_name + "-" + actual_file_name + ".md5sum") + file_content = md5_checksum + " " + str(file_to_get_md5) + "\n" + is_modified = check_md5checksum_in_cache_modified(file_content, cache_file_name, update=update) + return is_modified + + def calculate_md5_checksum_for_files( - md5sum_cache_dir: Path, update: bool = False + md5sum_cache_dir: Path, update: bool = False, skip_provider_dependencies_check: bool = False ) -> tuple[list[str], list[str]]: """ Calculates checksums for all interesting files and stores the hashes in the md5sum_cache_dir. @@ -68,36 +82,64 @@ def calculate_md5_checksum_for_files( :param md5sum_cache_dir: directory where to store cached information :param update: whether to update the hashes + :param skip_provider_dependencies_check: whether to skip regeneration of the provider dependencies :return: Tuple of two lists: modified and not-modified files """ not_modified_files = [] modified_files = [] - for calculate_md5_file in FILES_FOR_REBUILD_CHECK: - file_to_get_md5 = AIRFLOW_SOURCES_ROOT / calculate_md5_file - md5_checksum = generate_md5(file_to_get_md5) - sub_dir_name = file_to_get_md5.parts[-2] - actual_file_name = file_to_get_md5.parts[-1] - cache_file_name = Path(md5sum_cache_dir, sub_dir_name + "-" + actual_file_name + ".md5sum") - file_content = md5_checksum + " " + str(file_to_get_md5) + "\n" - is_modified = check_md5checksum_in_cache_modified(file_content, cache_file_name, update=update) + if not skip_provider_dependencies_check: + modified_provider_yaml_files = [] + for file in ALL_PROVIDER_YAML_FILES: + # Only check provider yaml files once and save the result immediately. + # If we need to regenerate the dependencies and they are not modified then + # all is fine and we can save checksums for the new files + if check_md5_sum_for_file(file, md5sum_cache_dir, True): + modified_provider_yaml_files.append(file) + if modified_provider_yaml_files: + get_console().print( + "[info]Attempting to generate provider dependencies. " + "Provider yaml files changed since last check:[/]" + ) + get_console().print( + [os.fspath(file.relative_to(AIRFLOW_SOURCES_ROOT)) for file in modified_provider_yaml_files] + ) + # Regenerate provider_dependencies.json + run_command( + [ + sys.executable, + os.fspath( + AIRFLOW_SOURCES_ROOT + / "scripts" + / "ci" + / "pre_commit" + / "pre_commit_update_providers_dependencies.py" + ), + ], + cwd=AIRFLOW_SOURCES_ROOT, + ) + for file in FILES_FOR_REBUILD_CHECK: + is_modified = check_md5_sum_for_file(file, md5sum_cache_dir, update) if is_modified: - modified_files.append(calculate_md5_file) + modified_files.append(file) else: - not_modified_files.append(calculate_md5_file) + not_modified_files.append(file) return modified_files, not_modified_files -def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path) -> bool: +def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path, skip_provider_dependencies_check: bool) -> bool: """ Checks if build is needed based on whether important files were modified. :param md5sum_cache_dir: directory where cached md5 sums are stored + :param skip_provider_dependencies_check: whether to skip regeneration of the provider dependencies :return: True if build is needed. """ build_needed = False - modified_files, not_modified_files = calculate_md5_checksum_for_files(md5sum_cache_dir, update=False) - if len(modified_files) > 0: + modified_files, not_modified_files = calculate_md5_checksum_for_files( + md5sum_cache_dir, update=False, skip_provider_dependencies_check=skip_provider_dependencies_check + ) + if modified_files: get_console().print( f"[warning]The following important files are modified in {AIRFLOW_SOURCES_ROOT} " f"since last time image was built: [/]\n\n" diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index 0c489bad63e3..c8ea48ec4845 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -209,12 +209,28 @@ def check_if_different_provider_used(file_path: Path) -> None: console.print("[red]Errors found during verification. Exiting!") console.print() sys.exit(1) - DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n") - console.print( - f"[yellow]If you see changes to the {DEPENDENCIES_JSON_FILE_PATH} file - " - f"do not modify the file manually. Let pre-commit do the job!" - ) - console.print() - console.print("[green]Verification complete! Success!\n") - console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}") - console.print() + old_dependencies = DEPENDENCIES_JSON_FILE_PATH.read_text() + new_dependencies = json.dumps(unique_sorted_dependencies, indent=2) + "\n" + if new_dependencies != old_dependencies: + DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n") + if os.environ.get("CI"): + console.print() + console.print(f"[info]Written {DEPENDENCIES_JSON_FILE_PATH}") + console.print( + f"[yellow]You will need to run breeze locally and commit " + f"{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)}!\n" + ) + console.print() + else: + console.print() + console.print( + f"[yellow]Regenerated new dependencies. Please commit " + f"{DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)}!\n" + ) + console.print(f"[info]Written {DEPENDENCIES_JSON_FILE_PATH}") + console.print() + else: + console.print( + "[green]No need to regenerate dependencies!\n[/]" + f"The {DEPENDENCIES_JSON_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} is up to date!\n" + ) From fdd1bc374cb9e29ea9201bf3211df4cfdd1d3519 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 23 Aug 2023 14:04:16 +0200 Subject: [PATCH 008/117] Upgrade botocore/aiobotocore minimum requirements (#33649) Botocore has a very peculiar process of releasing new version every day, which means that it gives `pip` hard time to figure what will be the non-conflicting set of packages when we have too low of a minium version set as requirement. Since we had > 1.24 that means that `pip` had to consider more than 340 versions for botocore, but also for related mypy packages and also a number of aiobotocore packages when resolving eager-upgrade. We limit all the relevant packages to 1.28 as minimum version now, and we should continue doing that regularly in the future. (cherry picked from commit 5f504e9a17353259e70bd3ed54f8edd2e465882c) --- Dockerfile.ci | 2 +- airflow/providers/amazon/provider.yaml | 24 ++++++++++++++------- generated/provider_dependencies.json | 11 +++++----- scripts/in_container/_in_container_utils.sh | 10 +++++++++ 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index a7df2f8c11f6..ed6e6754b79c 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1377,7 +1377,7 @@ RUN echo "Airflow version: ${AIRFLOW_VERSION}" # force them on the main Airflow package. Currently we need no extra limits as PIP 23.1+ has much better # dependency resolution and we do not need to limit the versions of the dependencies # aiobotocore is limited temporarily until it stops backtracking pip -ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="aiobotocore<2.6.0" +ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="" ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" ARG VERSION_SUFFIX_FOR_PYPI="" diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index a0af095b405a..ea7edd1304d7 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -23,6 +23,8 @@ description: | suspended: false versions: + - 8.5.1 + - 8.5.0 - 8.4.0 - 8.3.1 - 8.3.0 @@ -65,7 +67,19 @@ dependencies: - apache-airflow>=2.4.0 - apache-airflow-providers-common-sql>=1.3.1 - apache-airflow-providers-http - - boto3>=1.24.0 + # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number + # of candidates to consider. We should also make sure that all the below related packages have also the + # same minimum version specified. Boto3 1.28.0 has been released on July 6 2023. We should also make sure we + # set it to the version that `aiobotocore` supports (see `aiobotocore` optional dependency at the end + # of this file). Currently we set aiobotocore as minimum 2.5.3 - as this is was the first version + # that supported boto3 1.28. NOTE!!! BOTOCORE VERSIONS ARE SHIFTED BY 3 MINOR VERSIONS + - boto3>=1.28.0 + - mypy-boto3-rds>=1.28.0 + - mypy-boto3-redshift-data>=1.28.0 + - mypy-boto3-s3>=1.28.0 + - mypy-boto3-appflow>=1.28.0 + # NOTE!!! BOTOCORE VERSIONS ARE SHIFTED BY 3 MINOR VERSIONS + - botocore>=1.31.0 - asgiref # watchtower 3 has been released end Jan and introduced breaking change across the board that might # change logging behaviour: @@ -75,13 +89,7 @@ dependencies: - jsonpath_ng>=1.5.3 - redshift_connector>=2.0.888 - sqlalchemy_redshift>=0.8.6 - - mypy-boto3-rds>=1.24.0 - - mypy-boto3-redshift-data>=1.24.0 - # exclude 1.28.12 and 1.28.15 as it causes strange typing inconsistency - # https://github.com/youtype/mypy_boto3_builder/issues/209 - - mypy-boto3-appflow>=1.24.0,<1.28.12 - asgiref - - mypy-boto3-s3>=1.24.0 integrations: - integration-name: Amazon Athena @@ -693,7 +701,7 @@ additional-extras: # boto3 have native async support and we move away from aio aiobotocore - name: aiobotocore dependencies: - - aiobotocore[boto3]>=2.2.0 + - aiobotocore[boto3]>=2.5.3 - name: cncf.kubernetes dependencies: - apache-airflow-providers-cncf-kubernetes>=7.2.0 diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index b029917e81e4..01d3302ffe0a 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -26,12 +26,13 @@ "apache-airflow>=2.4.0", "asgiref", "asgiref", - "boto3>=1.24.0", + "boto3>=1.28.0", + "botocore>=1.31.0", "jsonpath_ng>=1.5.3", - "mypy-boto3-appflow>=1.24.0,<1.28.12", - "mypy-boto3-rds>=1.24.0", - "mypy-boto3-redshift-data>=1.24.0", - "mypy-boto3-s3>=1.24.0", + "mypy-boto3-appflow>=1.28.0", + "mypy-boto3-rds>=1.28.0", + "mypy-boto3-redshift-data>=1.28.0", + "mypy-boto3-s3>=1.28.0", "redshift_connector>=2.0.888", "sqlalchemy_redshift>=0.8.6", "watchtower~=2.0.1" diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh index 16663f47389c..288248b4eca4 100644 --- a/scripts/in_container/_in_container_utils.sh +++ b/scripts/in_container/_in_container_utils.sh @@ -297,6 +297,12 @@ function install_all_providers_from_pypi_with_eager_upgrade() { local res for provider_package in ${ALL_PROVIDERS_PACKAGES} do + # Until we release "yandex" provider with protobuf support we need to remove it from the list of providers + # to install, because it is impossible to find common requirements for already released yandex provider + # and current airflow + if [[ ${provider_package} == "apache-airflow-providers-yandex" ]]; then + continue + fi echo -n "Checking if ${provider_package} is available in PyPI: " res=$(curl --head -s -o /dev/null -w "%{http_code}" "https://pypi.org/project/${provider_package}/") if [[ ${res} == "200" ]]; then @@ -306,7 +312,11 @@ function install_all_providers_from_pypi_with_eager_upgrade() { echo "${COLOR_YELLOW}Skipped${COLOR_RESET}" fi done + + echo "Installing provider packages: ${packages_to_install[*]}" + + # we add eager requirements to make sure to take into account limitations that will allow us to # install all providers. We install only those packages that are available in PyPI - we might # Have some new providers in the works and they might not yet be simply available in PyPI From 45d228239a7c7f0c4ac43a33102979b414158563 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 26 Aug 2023 10:17:18 +0200 Subject: [PATCH 009/117] Limit hive provider check for Python 3.11 temporarily (#33774) In order to generate constraints, we need to temporarily limit also hive provider. There is a gap between wnen we added it in airflow setup and when we can generate constraints for the released providers from PyPI - we need to release the provider similarly like we have to do it for yandex. Therefore - until the upcoming hive provider is released (in 3 days) we need to limit hive from being considered in Python 3.11 consstraint generation for providers from PyPI (cherry picked from commit 984ba22e6e2a8c70b780d11c0bbaeb1dc0d6f14d) --- scripts/in_container/_in_container_utils.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh index 288248b4eca4..2ed267dd543c 100644 --- a/scripts/in_container/_in_container_utils.sh +++ b/scripts/in_container/_in_container_utils.sh @@ -303,6 +303,12 @@ function install_all_providers_from_pypi_with_eager_upgrade() { if [[ ${provider_package} == "apache-airflow-providers-yandex" ]]; then continue fi + # Until we release latest `hive` provider with pure-sasl support, we need to remove it from the + # list of providers to install for Python 3.11 because we cannot build sasl it for Python 3.11 + if [[ ${provider_package} == "apache-airflow-providers-apache-hive" \ + && ${PYTHON_MAJOR_MINOR_VERSION} == "3.11" ]]; then + continue + fi echo -n "Checking if ${provider_package} is available in PyPI: " res=$(curl --head -s -o /dev/null -w "%{http_code}" "https://pypi.org/project/${provider_package}/") if [[ ${res} == "200" ]]; then From 20bfcafa31451fe81d9a7e65794e5ea76573f7fb Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 26 Aug 2023 18:02:08 +0200 Subject: [PATCH 010/117] Limit Redis dependencies to match celery limits (#33773) Redis 5 relased last week breaks celery, celery is limiting it for now and will resolve it later, we should similarly limit redis on our side to limit redis for users who will not upgrade to celery that will be released shortly. Fixes: #33744 (cherry picked from commit 3ba994d8f4c4b5ce3828bebcff28bbfc25170004) --- airflow/providers/redis/provider.yaml | 6 +++++- generated/provider_dependencies.json | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/airflow/providers/redis/provider.yaml b/airflow/providers/redis/provider.yaml index 2169ffdc9c35..6ae19910a969 100644 --- a/airflow/providers/redis/provider.yaml +++ b/airflow/providers/redis/provider.yaml @@ -38,7 +38,11 @@ versions: dependencies: - apache-airflow>=2.4.0 - - redis>=3.2.0 + # We limit redis to <5.0.0 because of incompatibility with celery. Both Celery and Kombu limited it + # and deferred fixing it for later, we should bump the limit once they do. Also !=4.5.5 matches celery + # limits and prevents installing 4.5.5 which is broken. + # https://github.com/celery/celery/pull/8442, https://github.com/celery/kombu/pull/1776 + - redis>=4.5.2,<5.0.0,!=4.5.5 integrations: - integration-name: Redis diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 01d3302ffe0a..e2b1513f7e7e 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -750,7 +750,7 @@ "redis": { "deps": [ "apache-airflow>=2.4.0", - "redis>=3.2.0" + "redis>=4.5.2,<5.0.0,!=4.5.5" ], "cross-providers-deps": [], "excluded-python-versions": [] From d39abde8252bf5bb2e6c8c391aaee652e3c87882 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 27 Aug 2023 00:14:18 +0200 Subject: [PATCH 011/117] Remove "eager upgrade" from PROD image completely (#33784) There were still some left-overs of EAGER_UPGRADE in PROD image building. Howwever "eager upgrade" only makes sense for CI images. PROD images when being built should use eager upgrades as they are produced in the CI image step. This PR does the following: * removes eager upgrade parameters from PROD image * instead, prod image build has a new flag for installing the images: --use-constraints-for-context-packages which will automatically use constraints from "docker-context-files" if they are present there. * modifies the CI workflows to upload constraints as artifacts and download them for PROD image build when "eager upgrade" has been used and directs it to use "source" constraints * adds back support to "upgrade to newer dependencies" label that makes it easy to test "eager upgrade" As the result, when PROD image is build in CI: * when regular PR is run, it will use latest github "source" constraints * whwn "eager upgrade" PR is run, it will use the eager-upgrade constraints that were generated during CI build (cherry picked from commit 2b1a1944aa5cc84c94df523309eea30890d4fb74) --- .github/actions/build-ci-images/action.yml | 6 + .github/actions/build-prod-images/action.yml | 9 +- Dockerfile | 52 +++++- .../commands/ci_image_commands.py | 2 + .../commands/ci_image_commands_config.py | 1 + .../commands/production_image_commands.py | 73 +++----- .../production_image_commands_config.py | 4 +- .../src/airflow_breeze/global_constants.py | 1 + .../airflow_breeze/params/build_ci_params.py | 5 +- .../params/build_prod_params.py | 7 +- .../params/common_build_params.py | 14 +- .../airflow_breeze/utils/common_options.py | 9 + .../airflow_breeze/utils/selective_checks.py | 9 +- dev/breeze/tests/test_selective_checks.py | 21 ++- images/breeze/output-commands-hash.txt | 8 +- images/breeze/output_build-docs.svg | 120 ++++++------ images/breeze/output_ci-image.svg | 24 +-- images/breeze/output_ci-image_build.svg | 174 +++++++++--------- images/breeze/output_k8s.svg | 58 +++--- images/breeze/output_k8s_deploy-airflow.svg | 68 +++---- .../breeze/output_k8s_run-complete-tests.svg | 88 ++++----- images/breeze/output_prod-image_build.svg | 126 ++++++------- images/breeze/output_release-management.svg | 58 +++--- ...release-management_add-back-references.svg | 32 ++-- ...e-management_install-provider-packages.svg | 74 ++++---- ..._release-management_update-constraints.svg | 50 +++-- ...se-management_verify-provider-packages.svg | 60 +++--- images/breeze/output_sbom.svg | 20 +- ...ut_sbom_generate-provider-requirements.svg | 54 +++--- images/breeze/output_setup.svg | 32 ++-- images/breeze/output_setup_config.svg | 40 ++-- images/breeze/output_shell.svg | 124 ++++++------- images/breeze/output_start-airflow.svg | 128 ++++++------- images/breeze/output_static-checks.svg | 146 +++++++-------- images/breeze/output_testing.svg | 24 +-- .../output_testing_integration-tests.svg | 66 +++---- images/breeze/output_testing_tests.svg | 114 ++++++------ .../install_from_docker_context_files.sh | 45 ++++- 38 files changed, 1020 insertions(+), 926 deletions(-) diff --git a/.github/actions/build-ci-images/action.yml b/.github/actions/build-ci-images/action.yml index ed613c29ce0c..d43a42528449 100644 --- a/.github/actions/build-ci-images/action.yml +++ b/.github/actions/build-ci-images/action.yml @@ -48,6 +48,12 @@ runs: cat "files/constraints-${PYTHON_VERSION}/*.md" >> $GITHUB_STEP_SUMMARY || true done if: env.UPGRADE_TO_NEWER_DEPENDENCIES != 'false' + - name: "Upload constraint artifacts" + uses: actions/upload-artifact@v3 + with: + name: constraints + path: ./files/constraints-*/constraints-*.txt + retention-days: 7 - name: "Fix ownership" shell: bash run: breeze ci fix-ownership diff --git a/.github/actions/build-prod-images/action.yml b/.github/actions/build-prod-images/action.yml index 0086345b6977..feac8c2ef270 100644 --- a/.github/actions/build-prod-images/action.yml +++ b/.github/actions/build-prod-images/action.yml @@ -56,11 +56,18 @@ runs: - name: "Move dist packages to docker-context files" shell: bash run: mv -v ./dist/*.whl ./docker-context-files + - name: "Download constraints from the CI build" + uses: actions/download-artifact@v3 + with: + name: constraints + path: ./docker-context-files + if: env.UPGRADE_TO_NEWER_DEPENDENCIES != 'false' - name: "Build & Push PROD images ${{ env.IMAGE_TAG }}:${{ env.PYTHON_VERSIONS }}" shell: bash run: > breeze prod-image build --tag-as-latest --run-in-parallel --push - --install-packages-from-context --upgrade-on-failure + --install-packages-from-context --airflow-constraints-mode constraints-source-providers + --use-constraints-for-context-packages env: COMMIT_SHA: ${{ github.sha }} - name: "Fix ownership" diff --git a/Dockerfile b/Dockerfile index 5e09803b7ddf..ba460d49400f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -586,17 +586,42 @@ function install_airflow_and_providers_from_docker_context_files(){ return fi - echo - echo "${COLOR_BLUE}Force re-installing airflow and providers from local files with eager upgrade${COLOR_RESET}" - echo - # force reinstall all airflow + provider package local files with eager upgrade - set -x - pip install "${pip_flags[@]}" --root-user-action ignore --upgrade --upgrade-strategy eager \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} \ - ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} - set +x + if [[ ${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=} == "true" ]]; then + local python_version + python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') + local local_constraints_file=/docker-context-files/constraints-"${python_version}"/${AIRFLOW_CONSTRAINTS_MODE}-"${python_version}".txt + if [[ -f "${local_constraints_file}" ]]; then + echo + echo "${COLOR_BLUE}Installing docker-context-files packages with constraints found in ${local_constraints_file}${COLOR_RESET}" + echo + # force reinstall all airflow + provider packages with constraints found in + set -x + pip install "${pip_flags[@]}" --root-user-action ignore --upgrade \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint "${local_constraints_file}" \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + else + echo + echo "${COLOR_BLUE}Installing docker-context-files packages with constraints from GitHub${COLOR_RESET}" + echo + set -x + pip install "${pip_flags[@]}" --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + fi + else + echo + echo "${COLOR_BLUE}Installing docker-context-files packages without constraints${COLOR_RESET}" + echo + set -x + pip install "${pip_flags[@]}" --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + fi common::install_pip_version pip check } @@ -1269,6 +1294,12 @@ COPY --from=scripts common.sh install_pip_version.sh \ # is installed from docker-context files rather than from PyPI) ARG INSTALL_PACKAGES_FROM_CONTEXT="false" +# Normally constraints are not used when context packages are build - because we might have packages +# that are conflicting with Airflow constraints, however there are cases when we want to use constraints +# for example in CI builds when we already have source-package constraints - either from github branch or +# from eager-upgraded constraints by the CI builds +ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" + # In case of Production build image segment we want to pre-install main version of airflow # dependencies from GitHub so that we do not have to always reinstall it from the scratch. # The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") @@ -1293,6 +1324,7 @@ ARG VERSION_SUFFIX_FOR_PYPI="" ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ INSTALL_PACKAGES_FROM_CONTEXT=${INSTALL_PACKAGES_FROM_CONTEXT} \ + USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES} \ VERSION_SUFFIX_FOR_PYPI=${VERSION_SUFFIX_FOR_PYPI} WORKDIR ${AIRFLOW_HOME} diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index 2eecbfff3ca8..9d343ed57194 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -43,6 +43,7 @@ option_airflow_constraints_mode_ci, option_airflow_constraints_reference_build, option_answer, + option_build_progress, option_build_timeout_minutes, option_builder, option_commit_sha, @@ -226,6 +227,7 @@ def kill_process_group(build_process_group_id: int): @option_additional_dev_apt_command @option_additional_dev_apt_env @option_builder +@option_build_progress @option_build_timeout_minutes @option_commit_sha @option_dev_apt_command diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py index 58509c3c5798..6e88b70abe11 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py @@ -36,6 +36,7 @@ "--tag-as-latest", "--docker-cache", "--force-build", + "--build-progress", ], }, { diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py index b9dc45d681de..6f3e9eeced2b 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py @@ -40,6 +40,7 @@ option_airflow_constraints_location, option_airflow_constraints_mode_prod, option_airflow_constraints_reference_build, + option_build_progress, option_builder, option_commit_sha, option_debug_resources, @@ -68,8 +69,6 @@ option_runtime_apt_deps, option_skip_cleanup, option_tag_as_latest, - option_upgrade_on_failure, - option_upgrade_to_newer_dependencies, option_verbose, option_verify, option_version_suffix_for_pypi, @@ -157,8 +156,6 @@ def prod_image(): @option_debug_resources @option_include_success_outputs @option_python_versions -@option_upgrade_to_newer_dependencies -@option_upgrade_on_failure @option_platform_multiple @option_github_token @option_docker_cache @@ -179,6 +176,12 @@ def prod_image(): "Implies --disable-airflow-repo-cache.", is_flag=True, ) +@click.option( + "--use-constraints-for-context-packages", + help="Uses constraints for context packages installation - " + "either from constraints store in docker-context-files or from github.", + is_flag=True, +) @click.option( "--cleanup-context", help="Clean up docker context files before running build (cannot be used together" @@ -214,6 +217,7 @@ def prod_image(): @option_additional_runtime_apt_env @option_additional_runtime_apt_command @option_builder +@option_build_progress @option_dev_apt_command @option_dev_apt_deps @option_python_image @@ -435,26 +439,27 @@ def check_docker_context_files(install_packages_from_context: bool): :param install_packages_from_context: whether we want to install from docker-context-files """ - context_file = DOCKER_CONTEXT_DIR.glob("**/*") - number_of_context_files = len( - [context for context in context_file if context.is_file() and context.name != ".README.md"] + context_file = DOCKER_CONTEXT_DIR.rglob("*") + any_context_files = any( + context.is_file() + and context.name not in (".README.md", ".DS_Store") + and not context.parent.name.startswith("constraints") + for context in context_file ) - if number_of_context_files == 0: - if install_packages_from_context: - get_console().print("[warning]\nERROR! You want to install packages from docker-context-files") - get_console().print("[warning]\n but there are no packages to install in this folder.") - sys.exit(1) - else: - if not install_packages_from_context: - get_console().print( - "[warning]\n ERROR! There are some extra files in docker-context-files except README.md" - ) - get_console().print("[warning]\nAnd you did not choose --install-packages-from-context flag") - get_console().print( - "[warning]\nThis might result in unnecessary cache invalidation and long build times" - ) - get_console().print("[warning]Please restart the command with --cleanup-context switch\n") - sys.exit(1) + if not any_context_files and install_packages_from_context: + get_console().print("[warning]\nERROR! You want to install packages from docker-context-files") + get_console().print("[warning]\n but there are no packages to install in this folder.") + sys.exit(1) + elif any_context_files and not install_packages_from_context: + get_console().print( + "[warning]\n ERROR! There are some extra files in docker-context-files except README.md" + ) + get_console().print("[warning]\nAnd you did not choose --install-packages-from-context flag") + get_console().print( + "[warning]\nThis might result in unnecessary cache invalidation and long build times" + ) + get_console().print("[warning]Please restart the command with --cleanup-context switch\n") + sys.exit(1) def run_build_production_image( @@ -507,24 +512,6 @@ def run_build_production_image( text=True, output=output, ) - if ( - build_command_result.returncode != 0 - and prod_image_params.upgrade_on_failure - and not prod_image_params.upgrade_to_newer_dependencies - ): - prod_image_params.upgrade_to_newer_dependencies = True - get_console().print("[warning]Attempting to build with upgrade_to_newer_dependencies on failure") - build_command_result = run_command( - prepare_docker_build_command( - image_params=prod_image_params, - ), - cwd=AIRFLOW_SOURCES_ROOT, - check=False, - text=True, - env=env, - output=output, - ) - if build_command_result.returncode == 0: - if prod_image_params.tag_as_latest: - build_command_result = tag_image_as_latest(image_params=prod_image_params, output=output) + if build_command_result.returncode == 0 and prod_image_params.tag_as_latest: + build_command_result = tag_image_as_latest(image_params=prod_image_params, output=output) return build_command_result.returncode, f"Image build: {prod_image_params.python}" diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py index 148788afc485..8ffc63652163 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py @@ -31,11 +31,10 @@ "options": [ "--python", "--install-airflow-version", - "--upgrade-to-newer-dependencies", - "--upgrade-on-failure", "--image-tag", "--tag-as-latest", "--docker-cache", + "--build-progress", ], }, { @@ -79,6 +78,7 @@ "name": "Customization options (for specific customization needs)", "options": [ "--install-packages-from-context", + "--use-constraints-for-context-packages", "--cleanup-context", "--disable-mysql-client-installation", "--disable-mssql-client-installation", diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index d79a6561d2c4..2d4870c0585b 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -135,6 +135,7 @@ def all_helm_test_packages() -> list[str]: ALLOWED_INSTALLATION_PACKAGE_FORMATS = ["wheel", "sdist"] ALLOWED_INSTALLATION_METHODS = [".", "apache-airflow"] ALLOWED_BUILD_CACHE = ["registry", "local", "disabled"] +ALLOWED_BUILD_PROGRESS = ["auto", "plain", "tty"] MULTI_PLATFORM = "linux/amd64,linux/arm64" SINGLE_PLATFORMS = ["linux/amd64", "linux/arm64"] ALLOWED_PLATFORMS = [*SINGLE_PLATFORMS, MULTI_PLATFORM] diff --git a/dev/breeze/src/airflow_breeze/params/build_ci_params.py b/dev/breeze/src/airflow_breeze/params/build_ci_params.py index 4ad0b82789b8..74a6d5bb6471 100644 --- a/dev/breeze/src/airflow_breeze/params/build_ci_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_ci_params.py @@ -36,6 +36,8 @@ class BuildCiParams(CommonBuildParams): airflow_extras: str = "devel_ci" airflow_pre_cached_pip_packages: bool = True force_build: bool = False + upgrade_to_newer_dependencies: bool = False + upgrade_on_failure: bool = False eager_upgrade_additional_requirements: str = "" skip_provider_dependencies_check: bool = False @@ -66,7 +68,7 @@ def extra_docker_build_flags(self) -> list[str]: f"EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS={eager_upgrade_arg}", ] ) - return extra_ci_flags + return super().extra_docker_build_flags + extra_ci_flags @property def md5sum_cache_dir(self) -> Path: @@ -111,6 +113,7 @@ def optional_image_args(self) -> list[str]: "additional_python_deps", "version_suffix_for_pypi", "commit_sha", + "build_progress", ] def __post_init__(self): diff --git a/dev/breeze/src/airflow_breeze/params/build_prod_params.py b/dev/breeze/src/airflow_breeze/params/build_prod_params.py index 92174451c373..96c3a9397c9e 100644 --- a/dev/breeze/src/airflow_breeze/params/build_prod_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_prod_params.py @@ -52,6 +52,7 @@ class BuildProdParams(CommonBuildParams): install_airflow_reference: str = "" install_airflow_version: str = "" install_packages_from_context: bool = False + use_constraints_for_context_packages: bool = False installation_method: str = "." runtime_apt_command: str = "" runtime_apt_deps: str = "" @@ -159,7 +160,6 @@ def extra_docker_build_flags(self) -> list[str]: f"AIRFLOW_CONSTRAINTS_REFERENCE={self.airflow_constraints_reference}", ] ) - maintainers = json.dumps([{"name": "Apache Airflow PMC", "email": "dev@airflow.apache.org"}]) logo_url = "https://github.com/apache/airflow/raw/main/docs/apache-airflow/img/logos/wordmark_1.png" readme_url = "https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" @@ -175,7 +175,7 @@ def extra_docker_build_flags(self) -> list[str]: f"io.artifacthub.package.logo-url={logo_url}", ] ) - return extra_build_flags + return super().extra_docker_build_flags + extra_build_flags @property def airflow_pre_cached_pip_packages(self) -> str: @@ -221,7 +221,6 @@ def required_image_args(self) -> list[str]: "install_postgres_client", "install_providers_from_sources", "python_base_image", - "upgrade_to_newer_dependencies", ] @property @@ -242,4 +241,6 @@ def optional_image_args(self) -> list[str]: "runtime_apt_deps", "version_suffix_for_pypi", "commit_sha", + "build_progress", + "use_constraints_for_context_packages", ] diff --git a/dev/breeze/src/airflow_breeze/params/common_build_params.py b/dev/breeze/src/airflow_breeze/params/common_build_params.py index 90874f67b33b..fc1de2a4ed2d 100644 --- a/dev/breeze/src/airflow_breeze/params/common_build_params.py +++ b/dev/breeze/src/airflow_breeze/params/common_build_params.py @@ -22,7 +22,11 @@ from datetime import datetime from airflow_breeze.branch_defaults import AIRFLOW_BRANCH, DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH -from airflow_breeze.global_constants import APACHE_AIRFLOW_GITHUB_REPOSITORY, DOCKER_DEFAULT_PLATFORM +from airflow_breeze.global_constants import ( + ALLOWED_BUILD_PROGRESS, + APACHE_AIRFLOW_GITHUB_REPOSITORY, + DOCKER_DEFAULT_PLATFORM, +) from airflow_breeze.utils.console import get_console from airflow_breeze.utils.platforms import get_real_platform @@ -46,6 +50,7 @@ class CommonBuildParams: airflow_constraints_location: str = "" build_id: int = 0 builder: str = "autodetect" + build_progress: str = ALLOWED_BUILD_PROGRESS[0] constraints_github_repository: str = APACHE_AIRFLOW_GITHUB_REPOSITORY commit_sha: str = "" dev_apt_command: str = "" @@ -62,8 +67,6 @@ class CommonBuildParams: push: bool = False python: str = "3.8" tag_as_latest: bool = False - upgrade_to_newer_dependencies: bool = False - upgrade_on_failure: bool = False dry_run: bool = False version_suffix_for_pypi: str = "" verbose: bool = False @@ -96,7 +99,10 @@ def airflow_image_name(self): @property def extra_docker_build_flags(self) -> list[str]: - raise NotImplementedError() + extra_flass = [] + if self.build_progress: + extra_flass.append(f"--progress={self.build_progress}") + return extra_flass @property def docker_cache_directive(self) -> list[str]: diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index 92f2bf2fdaad..f1697790166d 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -25,6 +25,7 @@ ALL_HISTORICAL_PYTHON_VERSIONS, ALLOWED_BACKENDS, ALLOWED_BUILD_CACHE, + ALLOWED_BUILD_PROGRESS, ALLOWED_CELERY_BROKERS, ALLOWED_CONSTRAINTS_MODES_CI, ALLOWED_CONSTRAINTS_MODES_PROD, @@ -499,6 +500,14 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option, show_default=True, default="autodetect", ) +option_build_progress = click.option( + "--build-progress", + help="Build progress.", + type=BetterChoice(ALLOWED_BUILD_PROGRESS), + envvar="BUILD_PROGRESS", + show_default=True, + default=ALLOWED_BUILD_PROGRESS[0], +) option_include_success_outputs = click.option( "--include-success-outputs", help="Whether to include outputs of successful parallel runs (skipped by default).", diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 6f9fc51d8826..18e8bafa1c64 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -68,6 +68,7 @@ FULL_TESTS_NEEDED_LABEL = "full tests needed" DEBUG_CI_RESOURCES_LABEL = "debug ci resources" USE_PUBLIC_RUNNERS_LABEL = "use public runners" +UPGRADE_TO_NEWER_DEPENDENCIES_LABEL = "upgrade to newer dependencies" class FileGroupForCi(Enum): @@ -696,9 +697,11 @@ def basic_checks_only(self) -> bool: @cached_property def upgrade_to_newer_dependencies(self) -> bool: - return len( - self._matching_files(FileGroupForCi.SETUP_FILES, CI_FILE_GROUP_MATCHES) - ) > 0 or self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE] + return ( + len(self._matching_files(FileGroupForCi.SETUP_FILES, CI_FILE_GROUP_MATCHES)) > 0 + or self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE] + or UPGRADE_TO_NEWER_DEPENDENCIES_LABEL in self._pr_labels + ) @cached_property def docs_filter_list_as_string(self) -> str | None: diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 6e31ee6aaa2a..265d3331cda8 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -899,13 +899,14 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): @pytest.mark.parametrize( - "files, expected_outputs,", + "files, expected_outputs, pr_labels", [ pytest.param( ("airflow/models/dag.py",), { "upgrade-to-newer-dependencies": "false", }, + (), id="Regular source changed", ), pytest.param( @@ -913,6 +914,7 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): { "upgrade-to-newer-dependencies": "true", }, + (), id="Setup.py changed", ), pytest.param( @@ -920,6 +922,7 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): { "upgrade-to-newer-dependencies": "true", }, + (), id="Setup.cfg changed", ), pytest.param( @@ -927,6 +930,7 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): { "upgrade-to-newer-dependencies": "false", }, + (), id="Provider.yaml changed", ), pytest.param( @@ -934,17 +938,28 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): { "upgrade-to-newer-dependencies": "true", }, + (), id="Generated provider_dependencies changed", ), + pytest.param( + ("airflow/models/dag.py",), + { + "upgrade-to-newer-dependencies": "true", + }, + ("upgrade to newer dependencies",), + id="Regular source changed", + ), ], ) -def test_upgrade_to_newer_dependencies(files: tuple[str, ...], expected_outputs: dict[str, str]): +def test_upgrade_to_newer_dependencies( + files: tuple[str, ...], expected_outputs: dict[str, str], pr_labels: tuple[str] +): stderr = SelectiveChecks( files=files, commit_ref="HEAD", github_event=GithubEvents.PULL_REQUEST, - pr_labels=(), default_branch="main", + pr_labels=pr_labels, ) assert_outputs_are_printed(expected_outputs, str(stderr)) diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index ece518a8740d..fbed727ee148 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -10,10 +10,10 @@ ci:get-workflow-info:8246038093359b9c3c110043419473e2 ci:resource-check:bfcca92f18a403ca630955074eb5e9ad ci:selective-check:6657ed5d42affb7264b5efcc86f17a2a ci:5315c29bd9f68725ef92e4db8aff5cda -ci-image:build:93537e0c8f99923fe9e958884c36af38 +ci-image:build:96ed67d4e1008b3860e20faccfad1069 ci-image:pull:7f14482a588f018f76df84719e77723f ci-image:verify:c90dc7e20fce2351eb89d8d1ebbd35e7 -ci-image:1b912129939634c0e67f648e7a854998 +ci-image:ecf4756b5c00d0574b660d3b27658c2a cleanup:8d92d453a6700f6d8cb11fb6a8b50461 compile-www-assets:0963f1409f0aa1e3b137cddd4cc52e87 down:4580f5b3b178ea00182694f134a751f3 @@ -32,10 +32,10 @@ k8s:status:1529ccd444b41c4b0b5f943289957100 k8s:tests:2a1e2928faea2eddafaff94176a46690 k8s:upload-k8s-image:6b3a20cdeb692f3c3d727f6b9e68c901 k8s:8c1e4287deb0533a74f3b302f9c574be -prod-image:build:7b971535fd4a1b93bebacd58b52b073a +prod-image:build:789f32a07099033b49936d4cfbeb5322 prod-image:pull:76f1f27e6119928412abecf153fce4bb prod-image:verify:bd2b78738a7c388dbad6076c41a9f906 -prod-image:6877cb974df8918504234536f6a35886 +prod-image:2946a9cee30c8ad7bd738ea3c77bff55 release-management:add-back-references:0d4eb5ed82e5381bc630b343ba605a72 release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:b8fcaf8f0acd35ed5dbd48659bdb6485 diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index fdec4306b09e..3cad665ae03a 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -35,8 +35,8 @@ .breeze-build-docs-r1 { fill: #c5c8c6;font-weight: bold } .breeze-build-docs-r2 { fill: #c5c8c6 } .breeze-build-docs-r3 { fill: #d0b344;font-weight: bold } -.breeze-build-docs-r4 { fill: #68a0b3;font-weight: bold } -.breeze-build-docs-r5 { fill: #868887 } +.breeze-build-docs-r4 { fill: #868887 } +.breeze-build-docs-r5 { fill: #68a0b3;font-weight: bold } .breeze-build-docs-r6 { fill: #98a84b;font-weight: bold } .breeze-build-docs-r7 { fill: #8d7b39 } @@ -241,67 +241,67 @@ -Usage: breeze build-docs [OPTIONS] +Usage: breeze build-docs [OPTIONS] Build documents. -╭─ Doc flags ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---docs-only-dOnly build documentation. ---spellcheck-only-sOnly run spell checking. ---clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts     -before the build - useful for a clean build.                                                ---one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors. ---package-filterList of packages to consider.                                                               -(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |     -apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                    -apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |         -apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |             -apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |               -apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kafka |            -apache-airflow-providers-apache-kylin | apache-airflow-providers-apache-livy |              -apache-airflow-providers-apache-pig | apache-airflow-providers-apache-pinot |               -apache-airflow-providers-apache-spark | apache-airflow-providers-apache-sqoop |             -apache-airflow-providers-apprise | apache-airflow-providers-arangodb |                      -apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                  -apache-airflow-providers-celery | apache-airflow-providers-cloudant |                       -apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |            -apache-airflow-providers-daskexecutor | apache-airflow-providers-databricks |               -apache-airflow-providers-datadog | apache-airflow-providers-dbt-cloud |                     -apache-airflow-providers-dingding | apache-airflow-providers-discord |                      -apache-airflow-providers-docker | apache-airflow-providers-elasticsearch |                  -apache-airflow-providers-exasol | apache-airflow-providers-facebook |                       -apache-airflow-providers-ftp | apache-airflow-providers-github |                            -apache-airflow-providers-google | apache-airflow-providers-grpc |                           -apache-airflow-providers-hashicorp | apache-airflow-providers-http |                        -apache-airflow-providers-imap | apache-airflow-providers-influxdb |                         -apache-airflow-providers-jdbc | apache-airflow-providers-jenkins |                          -apache-airflow-providers-microsoft-azure | apache-airflow-providers-microsoft-mssql |       -apache-airflow-providers-microsoft-psrp | apache-airflow-providers-microsoft-winrm |        -apache-airflow-providers-mongo | apache-airflow-providers-mysql |                           -apache-airflow-providers-neo4j | apache-airflow-providers-odbc |                            -apache-airflow-providers-openfaas | apache-airflow-providers-openlineage |                  -apache-airflow-providers-opsgenie | apache-airflow-providers-oracle |                       -apache-airflow-providers-pagerduty | apache-airflow-providers-papermill |                   -apache-airflow-providers-plexus | apache-airflow-providers-postgres |                       -apache-airflow-providers-presto | apache-airflow-providers-qubole |                         -apache-airflow-providers-redis | apache-airflow-providers-salesforce |                      -apache-airflow-providers-samba | apache-airflow-providers-segment |                         -apache-airflow-providers-sendgrid | apache-airflow-providers-sftp |                         -apache-airflow-providers-singularity | apache-airflow-providers-slack |                     -apache-airflow-providers-smtp | apache-airflow-providers-snowflake |                        -apache-airflow-providers-sqlite | apache-airflow-providers-ssh |                            -apache-airflow-providers-tableau | apache-airflow-providers-tabular |                       -apache-airflow-providers-telegram | apache-airflow-providers-trino |                        -apache-airflow-providers-vertica | apache-airflow-providers-zendesk | docker-stack |        -helm-chart)                                                                                 ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Doc flags ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--docs-only-dOnly build documentation. +--spellcheck-only-sOnly run spell checking. +--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts     +before the build - useful for a clean build.                                                +--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors. +--package-filterList of packages to consider.                                                               +(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |     +apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                    +apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |         +apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |             +apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |               +apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kafka |            +apache-airflow-providers-apache-kylin | apache-airflow-providers-apache-livy |              +apache-airflow-providers-apache-pig | apache-airflow-providers-apache-pinot |               +apache-airflow-providers-apache-spark | apache-airflow-providers-apache-sqoop |             +apache-airflow-providers-apprise | apache-airflow-providers-arangodb |                      +apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                  +apache-airflow-providers-celery | apache-airflow-providers-cloudant |                       +apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |            +apache-airflow-providers-daskexecutor | apache-airflow-providers-databricks |               +apache-airflow-providers-datadog | apache-airflow-providers-dbt-cloud |                     +apache-airflow-providers-dingding | apache-airflow-providers-discord |                      +apache-airflow-providers-docker | apache-airflow-providers-elasticsearch |                  +apache-airflow-providers-exasol | apache-airflow-providers-facebook |                       +apache-airflow-providers-ftp | apache-airflow-providers-github |                            +apache-airflow-providers-google | apache-airflow-providers-grpc |                           +apache-airflow-providers-hashicorp | apache-airflow-providers-http |                        +apache-airflow-providers-imap | apache-airflow-providers-influxdb |                         +apache-airflow-providers-jdbc | apache-airflow-providers-jenkins |                          +apache-airflow-providers-microsoft-azure | apache-airflow-providers-microsoft-mssql |       +apache-airflow-providers-microsoft-psrp | apache-airflow-providers-microsoft-winrm |        +apache-airflow-providers-mongo | apache-airflow-providers-mysql |                           +apache-airflow-providers-neo4j | apache-airflow-providers-odbc |                            +apache-airflow-providers-openfaas | apache-airflow-providers-openlineage |                  +apache-airflow-providers-opsgenie | apache-airflow-providers-oracle |                       +apache-airflow-providers-pagerduty | apache-airflow-providers-papermill |                   +apache-airflow-providers-plexus | apache-airflow-providers-postgres |                       +apache-airflow-providers-presto | apache-airflow-providers-qubole |                         +apache-airflow-providers-redis | apache-airflow-providers-salesforce |                      +apache-airflow-providers-samba | apache-airflow-providers-segment |                         +apache-airflow-providers-sendgrid | apache-airflow-providers-sftp |                         +apache-airflow-providers-singularity | apache-airflow-providers-slack |                     +apache-airflow-providers-smtp | apache-airflow-providers-snowflake |                        +apache-airflow-providers-sqlite | apache-airflow-providers-ssh |                            +apache-airflow-providers-tableau | apache-airflow-providers-tabular |                       +apache-airflow-providers-telegram | apache-airflow-providers-trino |                        +apache-airflow-providers-vertica | apache-airflow-providers-zendesk | docker-stack |        +helm-chart)                                                                                 +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_ci-image.svg b/images/breeze/output_ci-image.svg index bf1df9347a16..3025e6e470c3 100644 --- a/images/breeze/output_ci-image.svg +++ b/images/breeze/output_ci-image.svg @@ -35,8 +35,8 @@ .breeze-ci-image-r1 { fill: #c5c8c6;font-weight: bold } .breeze-ci-image-r2 { fill: #c5c8c6 } .breeze-ci-image-r3 { fill: #d0b344;font-weight: bold } -.breeze-ci-image-r4 { fill: #68a0b3;font-weight: bold } -.breeze-ci-image-r5 { fill: #868887 } +.breeze-ci-image-r4 { fill: #868887 } +.breeze-ci-image-r5 { fill: #68a0b3;font-weight: bold } .breeze-ci-image-r6 { fill: #98a84b;font-weight: bold } @@ -93,18 +93,18 @@ -Usage: breeze ci-image [OPTIONSCOMMAND [ARGS]... +Usage: breeze ci-image [OPTIONS] COMMAND [ARGS]... -Tools that developers can use to manually manage CI images +Tools that developers can use to manually manage CI images -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ CI Image tools ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -build   Build CI image. Include building multiple images for all python versions.                                  -pull    Pull and optionally verify CI images - possibly in parallel for all Python versions.                       -verify  Verify CI image.                                                                                           -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ CI Image tools ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +build   Build CI image. Include building multiple images for all python versions.                                  +pull    Pull and optionally verify CI images - possibly in parallel for all Python versions.                       +verify  Verify CI image.                                                                                           +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_ci-image_build.svg b/images/breeze/output_ci-image_build.svg index 1418e4c43c76..2f6e1d26a790 100644 --- a/images/breeze/output_ci-image_build.svg +++ b/images/breeze/output_ci-image_build.svg @@ -1,4 +1,4 @@ - + - + @@ -291,9 +291,12 @@ + + + - Command: ci-image build + Command: ci-image build @@ -304,88 +307,89 @@ -Usage: breeze ci-image build [OPTIONS] +Usage: breeze ci-image build [OPTIONS] -Build CI image. Include building multiple images for all python versions. +Build CI image. Include building multiple images for all python versions. -╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11)                                  -[default: 3.8]                                               ---upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. ---upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       -fails.                                                                          ---image-tag-tTag the image after building it.(TEXT)[default: latest] ---tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful -when you build or pull image with --image-tag.                                  ---docker-cache-cCache option for image used during the build.(registry | local | disabled) -[default: registry]                           ---force-buildForce image build no matter if it is determined as needed. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced options (for power users) ─────────────────────────────────────────────────────────────────────────────────╮ ---install-providers-from-sourcesInstall providers from sources when installing. ---airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      -file. It could be full remote URL to the location file, or local file placed in  -`docker-context-files` (in this case it has to start with                        -/opt/airflow/docker-context-files).                                              -(TEXT)                                                                           ---airflow-constraints-modeMode of constraints for CI image building.                              -(constraints-source-providers | constraints | constraints-no-providers) -[default: constraints-source-providers]                                 ---airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) ---python-imageIf specified this is the base python image used to build the image. Should be    -something like: python:VERSION-slim-bullseye.                                    -(TEXT)                                                                           ---additional-python-depsAdditional python dependencies to use when building the images.(TEXT) ---additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) ---additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      -itself).                                                                         -(TEXT)                                                                           ---additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) ---additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) ---additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) ---dev-apt-depsApt dev dependencies to use when building the images.(TEXT) ---dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) ---version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) ---commit-shaCommit SHA that is used to build the images.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Backtracking options ───────────────────────────────────────────────────────────────────────────────────────────────╮ ---build-timeout-minutesOptional timeout for the build in minutes. Useful to detect `pip`         -backtracking problems.                                                    -(INTEGER)                                                                 ---eager-upgrade-additional-requirementsOptional additional requirements to upgrade eagerly to avoid backtracking -(see `breeze ci find-backtracking-candidates`).                           -(TEXT)                                                                    -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Preparing cache and push (for maintainers and CI) ──────────────────────────────────────────────────────────────────╮ ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) -[default: autodetect]                                          ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64 | linux/amd64,linux/arm64) ---pushPush image after building it. ---prepare-buildx-cachePrepares build cache (this is done as separate per-platform steps instead of building the  -image).                                                                                    -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Github authentication ──────────────────────────────────────────────────────────────────────────────────────────────╮ ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---github-tokenThe token used to authenticate to GitHub.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images. +(>3.8< | 3.9 | 3.10 | 3.11)                                  +[default: 3.8]                                               +--upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. +--upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       +fails.                                                                          +--image-tag-tTag the image after building it.(TEXT)[default: latest] +--tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful +when you build or pull image with --image-tag.                                  +--docker-cache-cCache option for image used during the build.(registry | local | disabled) +[default: registry]                           +--force-buildForce image build no matter if it is determined as needed. +--build-progressBuild progress.(auto | plain | tty)[default: auto] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced options (for power users) ─────────────────────────────────────────────────────────────────────────────────╮ +--install-providers-from-sourcesInstall providers from sources when installing. +--airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      +file. It could be full remote URL to the location file, or local file placed in  +`docker-context-files` (in this case it has to start with                        +/opt/airflow/docker-context-files).                                              +(TEXT)                                                                           +--airflow-constraints-modeMode of constraints for CI image building.                              +(constraints-source-providers | constraints | constraints-no-providers) +[default: constraints-source-providers]                                 +--airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) +--python-imageIf specified this is the base python image used to build the image. Should be    +something like: python:VERSION-slim-bullseye.                                    +(TEXT)                                                                           +--additional-python-depsAdditional python dependencies to use when building the images.(TEXT) +--additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) +--additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      +itself).                                                                         +(TEXT)                                                                           +--additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) +--additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) +--additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) +--dev-apt-depsApt dev dependencies to use when building the images.(TEXT) +--dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) +--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) +--commit-shaCommit SHA that is used to build the images.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Backtracking options ───────────────────────────────────────────────────────────────────────────────────────────────╮ +--build-timeout-minutesOptional timeout for the build in minutes. Useful to detect `pip`         +backtracking problems.                                                    +(INTEGER)                                                                 +--eager-upgrade-additional-requirementsOptional additional requirements to upgrade eagerly to avoid backtracking +(see `breeze ci find-backtracking-candidates`).                           +(TEXT)                                                                    +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Preparing cache and push (for maintainers and CI) ──────────────────────────────────────────────────────────────────╮ +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +[default: autodetect]                                          +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64 | linux/amd64,linux/arm64) +--pushPush image after building it. +--prepare-buildx-cachePrepares build cache (this is done as separate per-platform steps instead of building the  +image).                                                                                    +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Github authentication ──────────────────────────────────────────────────────────────────────────────────────────────╮ +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--github-tokenThe token used to authenticate to GitHub.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_k8s.svg b/images/breeze/output_k8s.svg index 5ba36f4e5299..b5d8fb3fda19 100644 --- a/images/breeze/output_k8s.svg +++ b/images/breeze/output_k8s.svg @@ -35,8 +35,8 @@ .breeze-k8s-r1 { fill: #c5c8c6;font-weight: bold } .breeze-k8s-r2 { fill: #c5c8c6 } .breeze-k8s-r3 { fill: #d0b344;font-weight: bold } -.breeze-k8s-r4 { fill: #68a0b3;font-weight: bold } -.breeze-k8s-r5 { fill: #868887 } +.breeze-k8s-r4 { fill: #868887 } +.breeze-k8s-r5 { fill: #68a0b3;font-weight: bold } .breeze-k8s-r6 { fill: #98a84b;font-weight: bold } @@ -147,36 +147,36 @@ -Usage: breeze k8s [OPTIONSCOMMAND [ARGS]... +Usage: breeze k8s [OPTIONS] COMMAND [ARGS]... Tools that developers use to run Kubernetes tests -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ K8S cluster management commands ────────────────────────────────────────────────────────────────────────────────────╮ -setup-env        Setup shared Kubernetes virtual environment and tools.                                            -create-cluster   Create a KinD Cluster for Python and Kubernetes version specified (optionally create all clusters -in parallel).                                                                                     -configure-clusterConfigures cluster for airflow deployment - creates namespaces and test resources (optionally for -all clusters in parallel).                                                                        -build-k8s-image  Build k8s-ready airflow image (optionally all images in parallel).                                -upload-k8s-image Upload k8s-ready airflow image to the KinD cluster (optionally to all clusters in parallel)       -deploy-airflow   Deploy airflow image to the current KinD cluster (or all clusters).                               -delete-cluster   Delete the current KinD Cluster (optionally all clusters).                                        -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ K8S inspection commands ────────────────────────────────────────────────────────────────────────────────────────────╮ -status  Check status of the current cluster and airflow deployed to it (optionally all clusters).                  -logs    Dump k8s logs to ${TMP_DIR}/kind_logs_<cluster_name> directory (optionally all clusters).                  -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ K8S testing commands ───────────────────────────────────────────────────────────────────────────────────────────────╮ -tests             Run tests against the current KinD cluster (optionally for all clusters in parallel).            -run-complete-testsRun complete k8s tests consisting of: creating cluster, building and uploading image, deploying  -airflow, running tests and deleting clusters (optionally for all clusters in parallel).          -shell             Run shell environment for the current KinD cluster.                                              -k9s               Run k9s tool. You can pass any k9s args as extra args.                                           -logs              Dump k8s logs to ${TMP_DIR}/kind_logs_<cluster_name> directory (optionally all clusters).        -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ K8S cluster management commands ────────────────────────────────────────────────────────────────────────────────────╮ +setup-env        Setup shared Kubernetes virtual environment and tools.                                            +create-cluster   Create a KinD Cluster for Python and Kubernetes version specified (optionally create all clusters +in parallel).                                                                                     +configure-clusterConfigures cluster for airflow deployment - creates namespaces and test resources (optionally for +all clusters in parallel).                                                                        +build-k8s-image  Build k8s-ready airflow image (optionally all images in parallel).                                +upload-k8s-image Upload k8s-ready airflow image to the KinD cluster (optionally to all clusters in parallel)       +deploy-airflow   Deploy airflow image to the current KinD cluster (or all clusters).                               +delete-cluster   Delete the current KinD Cluster (optionally all clusters).                                        +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ K8S inspection commands ────────────────────────────────────────────────────────────────────────────────────────────╮ +status  Check status of the current cluster and airflow deployed to it (optionally all clusters).                  +logs    Dump k8s logs to ${TMP_DIR}/kind_logs_<cluster_name> directory (optionally all clusters).                  +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ K8S testing commands ───────────────────────────────────────────────────────────────────────────────────────────────╮ +tests             Run tests against the current KinD cluster (optionally for all clusters in parallel).            +run-complete-testsRun complete k8s tests consisting of: creating cluster, building and uploading image, deploying  +airflow, running tests and deleting clusters (optionally for all clusters in parallel).          +shell             Run shell environment for the current KinD cluster.                                              +k9s               Run k9s tool. You can pass any k9s args as extra args.                                           +logs              Dump k8s logs to ${TMP_DIR}/kind_logs_<cluster_name> directory (optionally all clusters).        +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_k8s_deploy-airflow.svg b/images/breeze/output_k8s_deploy-airflow.svg index 694ab621a786..26dfc064b6a0 100644 --- a/images/breeze/output_k8s_deploy-airflow.svg +++ b/images/breeze/output_k8s_deploy-airflow.svg @@ -35,8 +35,8 @@ .breeze-k8s-deploy-airflow-r1 { fill: #c5c8c6;font-weight: bold } .breeze-k8s-deploy-airflow-r2 { fill: #c5c8c6 } .breeze-k8s-deploy-airflow-r3 { fill: #d0b344;font-weight: bold } -.breeze-k8s-deploy-airflow-r4 { fill: #68a0b3;font-weight: bold } -.breeze-k8s-deploy-airflow-r5 { fill: #868887 } +.breeze-k8s-deploy-airflow-r4 { fill: #868887 } +.breeze-k8s-deploy-airflow-r5 { fill: #68a0b3;font-weight: bold } .breeze-k8s-deploy-airflow-r6 { fill: #98a84b;font-weight: bold } .breeze-k8s-deploy-airflow-r7 { fill: #8d7b39 } @@ -163,41 +163,41 @@ -Usage: breeze k8s deploy-airflow [OPTIONS] [EXTRA_OPTIONS]... +Usage: breeze k8s deploy-airflow [OPTIONS] [EXTRA_OPTIONS]... Deploy airflow image to the current KinD cluster (or all clusters). -╭─ Airflow deploy flags ───────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---kubernetes-versionKubernetes version used to create the KinD cluster of. -(>v1.24.15< | v1.25.11 | v1.26.6 | v1.27.3)            -[default: v1.24.15]                                    ---executorExecutor to use for a kubernetes cluster.                                          -(>KubernetesExecutor< | CeleryExecutor | LocalExecutor | CeleryKubernetesExecutor) -[default: KubernetesExecutor]                                                      ---upgradeUpgrade Helm Chart rather than installing it. ---wait-time-in-secondsWait for Airflow webserver for specified number of seconds.(INTEGER RANGE) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Parallel options ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel for cluster  -operations.                                                                             -(INTEGER RANGE)                                                                         -[default: 2; 1<=x<=4]                                                                   ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---kubernetes-versionsKubernetes versions used to run in parallel (space separated).(TEXT) -[default: v1.24.15 v1.25.11 v1.26.6 v1.27.3]                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Airflow deploy flags ───────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--kubernetes-versionKubernetes version used to create the KinD cluster of. +(>v1.24.15< | v1.25.11 | v1.26.6 | v1.27.3)            +[default: v1.24.15]                                    +--executorExecutor to use for a kubernetes cluster.                                          +(>KubernetesExecutor< | CeleryExecutor | LocalExecutor | CeleryKubernetesExecutor) +[default: KubernetesExecutor]                                                      +--upgradeUpgrade Helm Chart rather than installing it. +--wait-time-in-secondsWait for Airflow webserver for specified number of seconds.(INTEGER RANGE) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Parallel options ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel for cluster  +operations.                                                                             +(INTEGER RANGE)                                                                         +[default: 2; 1<=x<=4]                                                                   +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--kubernetes-versionsKubernetes versions used to run in parallel (space separated).(TEXT) +[default: v1.24.15 v1.25.11 v1.26.6 v1.27.3]                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_k8s_run-complete-tests.svg b/images/breeze/output_k8s_run-complete-tests.svg index 82db183c2b64..56f69599187b 100644 --- a/images/breeze/output_k8s_run-complete-tests.svg +++ b/images/breeze/output_k8s_run-complete-tests.svg @@ -35,8 +35,8 @@ .breeze-k8s-run-complete-tests-r1 { fill: #c5c8c6;font-weight: bold } .breeze-k8s-run-complete-tests-r2 { fill: #c5c8c6 } .breeze-k8s-run-complete-tests-r3 { fill: #d0b344;font-weight: bold } -.breeze-k8s-run-complete-tests-r4 { fill: #68a0b3;font-weight: bold } -.breeze-k8s-run-complete-tests-r5 { fill: #868887 } +.breeze-k8s-run-complete-tests-r4 { fill: #868887 } +.breeze-k8s-run-complete-tests-r5 { fill: #68a0b3;font-weight: bold } .breeze-k8s-run-complete-tests-r6 { fill: #8d7b39 } .breeze-k8s-run-complete-tests-r7 { fill: #98a84b;font-weight: bold } @@ -196,52 +196,52 @@ -Usage: breeze k8s run-complete-tests [OPTIONS] [TEST_ARGS]... +Usage: breeze k8s run-complete-tests [OPTIONS] [TEST_ARGS]... Run complete k8s tests consisting of: creating cluster, building and uploading image, deploying airflow, running tests and deleting clusters (optionally for all clusters in parallel). -╭─ K8S cluster creation flags ─────────────────────────────────────────────────────────────────────────────────────────╮ ---force-recreate-clusterForce recreation of the cluster even if it is already created. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Airflow deploy flags ───────────────────────────────────────────────────────────────────────────────────────────────╮ ---upgradeUpgrade Helm Chart rather than installing it. ---wait-time-in-secondsWait for Airflow webserver for specified number of seconds.(INTEGER RANGE) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Build image flags ──────────────────────────────────────────────────────────────────────────────────────────────────╮ ---rebuild-base-imageRebuilds base Airflow image before building K8S image. ---image-tag-tImage tag used to build K8S image from.(TEXT)[default: latest] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ K8S tests flags ────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---kubernetes-versionKubernetes version used to create the KinD cluster of. -(>v1.24.15< | v1.25.11 | v1.26.6 | v1.27.3)            -[default: v1.24.15]                                    ---executorExecutor to use for a kubernetes cluster.                                          -(>KubernetesExecutor< | CeleryExecutor | LocalExecutor | CeleryKubernetesExecutor) -[default: KubernetesExecutor]                                                      ---force-venv-setupForce recreation of the virtualenv. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Parallel options ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel for cluster  -operations.                                                                             -(INTEGER RANGE)                                                                         -[default: 2; 1<=x<=4]                                                                   ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---kubernetes-versionsKubernetes versions used to run in parallel (space separated).(TEXT) -[default: v1.24.15 v1.25.11 v1.26.6 v1.27.3]                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ K8S cluster creation flags ─────────────────────────────────────────────────────────────────────────────────────────╮ +--force-recreate-clusterForce recreation of the cluster even if it is already created. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Airflow deploy flags ───────────────────────────────────────────────────────────────────────────────────────────────╮ +--upgradeUpgrade Helm Chart rather than installing it. +--wait-time-in-secondsWait for Airflow webserver for specified number of seconds.(INTEGER RANGE) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Build image flags ──────────────────────────────────────────────────────────────────────────────────────────────────╮ +--rebuild-base-imageRebuilds base Airflow image before building K8S image. +--image-tag-tImage tag used to build K8S image from.(TEXT)[default: latest] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ K8S tests flags ────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--kubernetes-versionKubernetes version used to create the KinD cluster of. +(>v1.24.15< | v1.25.11 | v1.26.6 | v1.27.3)            +[default: v1.24.15]                                    +--executorExecutor to use for a kubernetes cluster.                                          +(>KubernetesExecutor< | CeleryExecutor | LocalExecutor | CeleryKubernetesExecutor) +[default: KubernetesExecutor]                                                      +--force-venv-setupForce recreation of the virtualenv. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Parallel options ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel for cluster  +operations.                                                                             +(INTEGER RANGE)                                                                         +[default: 2; 1<=x<=4]                                                                   +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--kubernetes-versionsKubernetes versions used to run in parallel (space separated).(TEXT) +[default: v1.24.15 v1.25.11 v1.26.6 v1.27.3]                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_prod-image_build.svg b/images/breeze/output_prod-image_build.svg index eefdc8f4ccc0..1424dc9ee3cc 100644 --- a/images/breeze/output_prod-image_build.svg +++ b/images/breeze/output_prod-image_build.svg @@ -345,69 +345,69 @@ Build Production image. Include building multiple images for all or selected Python versions sequentially. ╭─ Basic usage ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11)                                  -[default: 3.8]                                               ---install-airflow-version-VInstall version of Airflow from PyPI.(TEXT) ---upgrade-to-newer-dependencies-uWhen set, upgrade all PIP packages to latest. ---upgrade-on-failureWhen set, attempt to run upgrade to newer dependencies when regular build       -fails.                                                                          ---image-tag-tTag the image after building it.(TEXT)[default: latest] ---tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful -when you build or pull image with --image-tag.                                  ---docker-cache-cCache option for image used during the build.(registry | local | disabled) -[default: registry]                           -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) -[default: 3.8 3.9 3.10 3.11]                                                   ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options for customizing images ─────────────────────────────────────────────────────────────────────────────────────╮ ---install-providers-from-sourcesInstall providers from sources when installing. ---airflow-extrasExtras to install by default.                                                    -(TEXT)                                                                           -[default:                                                                        -aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsear… ---airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      -file. It could be full remote URL to the location file, or local file placed in  -`docker-context-files` (in this case it has to start with                        -/opt/airflow/docker-context-files).                                              -(TEXT)                                                                           ---airflow-constraints-modeMode of constraints for PROD image building.                            -(constraints | constraints-no-providers | constraints-source-providers) -[default: constraints]                                                  ---airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) ---python-imageIf specified this is the base python image used to build the image. Should be    -something like: python:VERSION-slim-bullseye.                                    -(TEXT)                                                                           ---additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) ---additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      -itself).                                                                         -(TEXT)                                                                           ---additional-python-depsAdditional python dependencies to use when building the images.(TEXT) ---additional-runtime-apt-depsAdditional apt runtime dependencies to use when building the images.(TEXT) ---additional-runtime-apt-envAdditional environment variables set when adding runtime dependencies.(TEXT) ---additional-runtime-apt-commandAdditional command executed before runtime apt deps are installed.(TEXT) ---additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) ---additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) ---additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) ---runtime-apt-depsApt runtime dependencies to use when building the images.(TEXT) ---runtime-apt-commandCommand executed before runtime apt deps are installed.(TEXT) ---dev-apt-depsApt dev dependencies to use when building the images.(TEXT) ---dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) ---version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) ---commit-shaCommit SHA that is used to build the images.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Customization options (for specific customization needs) ───────────────────────────────────────────────────────────╮ ---install-packages-from-contextInstall wheels from local docker-context-files when building image.        -Implies --disable-airflow-repo-cache.                                      +--python-pPython major/minor version used in Airflow image for images. +(>3.8< | 3.9 | 3.10 | 3.11)                                  +[default: 3.8]                                               +--install-airflow-version-VInstall version of Airflow from PyPI.(TEXT) +--image-tag-tTag the image after building it.(TEXT)[default: latest] +--tag-as-latestTags the image as latest and update checksum of all files after pulling. Useful when  +you build or pull image with --image-tag.                                             +--docker-cache-cCache option for image used during the build.(registry | local | disabled) +[default: registry]                           +--build-progressBuild progress.(auto | plain | tty)[default: auto] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building images in parallel ────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--python-versionsSpace separated list of python versions used for build with multiple versions.(TEXT) +[default: 3.8 3.9 3.10 3.11]                                                   +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options for customizing images ─────────────────────────────────────────────────────────────────────────────────────╮ +--install-providers-from-sourcesInstall providers from sources when installing. +--airflow-extrasExtras to install by default.                                                    +(TEXT)                                                                           +[default:                                                                        +aiobotocore,amazon,async,celery,cncf.kubernetes,daskexecutor,docker,elasticsear… +--airflow-constraints-locationIf specified, it is used instead of calculating reference to the constraint      +file. It could be full remote URL to the location file, or local file placed in  +`docker-context-files` (in this case it has to start with                        +/opt/airflow/docker-context-files).                                              +(TEXT)                                                                           +--airflow-constraints-modeMode of constraints for PROD image building.                            +(constraints | constraints-no-providers | constraints-source-providers) +[default: constraints]                                                  +--airflow-constraints-referenceConstraint reference to use when building the image.(TEXT) +--python-imageIf specified this is the base python image used to build the image. Should be    +something like: python:VERSION-slim-bullseye.                                    +(TEXT)                                                                           +--additional-extrasAdditional extra package while installing Airflow in the image.(TEXT) +--additional-pip-install-flagsAdditional flags added to `pip install` commands (except reinstalling `pip`      +itself).                                                                         +(TEXT)                                                                           +--additional-python-depsAdditional python dependencies to use when building the images.(TEXT) +--additional-runtime-apt-depsAdditional apt runtime dependencies to use when building the images.(TEXT) +--additional-runtime-apt-envAdditional environment variables set when adding runtime dependencies.(TEXT) +--additional-runtime-apt-commandAdditional command executed before runtime apt deps are installed.(TEXT) +--additional-dev-apt-depsAdditional apt dev dependencies to use when building the images.(TEXT) +--additional-dev-apt-envAdditional environment variables set when adding dev dependencies.(TEXT) +--additional-dev-apt-commandAdditional command executed before dev apt deps are installed.(TEXT) +--runtime-apt-depsApt runtime dependencies to use when building the images.(TEXT) +--runtime-apt-commandCommand executed before runtime apt deps are installed.(TEXT) +--dev-apt-depsApt dev dependencies to use when building the images.(TEXT) +--dev-apt-commandCommand executed before dev apt deps are installed.(TEXT) +--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT) +--commit-shaCommit SHA that is used to build the images.(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Customization options (for specific customization needs) ───────────────────────────────────────────────────────────╮ +--install-packages-from-contextInstall wheels from local docker-context-files when building image.        +Implies --disable-airflow-repo-cache.                                      +--use-constraints-for-context-packagesUses constraints for context packages installation - either from           +constraints store in docker-context-files or from github.                  --cleanup-contextClean up docker context files before running build (cannot be used         together with --install-packages-from-context).                            --disable-mysql-client-installationDo not install MySQL client. diff --git a/images/breeze/output_release-management.svg b/images/breeze/output_release-management.svg index fad73508b9b5..ea4a611c4a57 100644 --- a/images/breeze/output_release-management.svg +++ b/images/breeze/output_release-management.svg @@ -35,8 +35,8 @@ .breeze-release-management-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-r2 { fill: #c5c8c6 } .breeze-release-management-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-r5 { fill: #868887 } +.breeze-release-management-r4 { fill: #868887 } +.breeze-release-management-r5 { fill: #68a0b3;font-weight: bold } .breeze-release-management-r6 { fill: #98a84b;font-weight: bold } @@ -147,36 +147,36 @@ -Usage: breeze release-management [OPTIONSCOMMAND [ARGS]... +Usage: breeze release-management [OPTIONS] COMMAND [ARGS]... Tools that release managers can use to prepare and manage Airflow releases -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Airflow release commands ───────────────────────────────────────────────────────────────────────────────────────────╮ -prepare-airflow-package      Prepare sdist/whl package of Airflow.                                                 -create-minor-branch          Create a new version branch and update the default branches in main                   -start-rc-process             Start RC process                                                                      -start-release                Start Airflow release process                                                         -release-prod-images          Release production images to DockerHub (needs DockerHub permissions).                 -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Providers release commands ─────────────────────────────────────────────────────────────────────────────────────────╮ -prepare-provider-documentation      Prepare CHANGELOGREADME and COMMITS information for providers.               -prepare-provider-packages           Prepare sdist/whl packages of Airflow Providers.                               -install-provider-packages           Installs provider packages that can be found in dist.                          -verify-provider-packages            Verifies if all provider code is following expectations for providers.         -generate-providers-metadata         Generates metadata for providers.                                              -generate-issue-content-providers    Generates content for issue to test the release.                               -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Other release commands ─────────────────────────────────────────────────────────────────────────────────────────────╮ -publish-docs           Command to publish generated documentation to airflow-site                                  -generate-constraints   Generates pinned constraint files with all extras from setup.py in parallel.                -add-back-references    Command to add back references for documentation to make it backward compatible.            -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -update-constraints              Update released constraints with manual changes.                                   -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Airflow release commands ───────────────────────────────────────────────────────────────────────────────────────────╮ +prepare-airflow-package      Prepare sdist/whl package of Airflow.                                                 +create-minor-branch          Create a new version branch and update the default branches in main                   +start-rc-process             Start RC process                                                                      +start-release                Start Airflow release process                                                         +release-prod-images          Release production images to DockerHub (needs DockerHub permissions).                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Providers release commands ─────────────────────────────────────────────────────────────────────────────────────────╮ +prepare-provider-documentation      Prepare CHANGELOG, README and COMMITS information for providers.               +prepare-provider-packages           Prepare sdist/whl packages of Airflow Providers.                               +install-provider-packages           Installs provider packages that can be found in dist.                          +verify-provider-packages            Verifies if all provider code is following expectations for providers.         +generate-providers-metadata         Generates metadata for providers.                                              +generate-issue-content-providers    Generates content for issue to test the release.                               +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Other release commands ─────────────────────────────────────────────────────────────────────────────────────────────╮ +publish-docs            Command to publish generated documentation to airflow-site                                 +generate-constraints    Generates pinned constraint files with all extras from setup.py in parallel.               +add-back-references     Command to add back references for documentation to make it backward compatible            +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +update-constraints              Update released constraints with manual changes.                                   +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_add-back-references.svg b/images/breeze/output_release-management_add-back-references.svg index 6b627938723e..b8ead97cde42 100644 --- a/images/breeze/output_release-management_add-back-references.svg +++ b/images/breeze/output_release-management_add-back-references.svg @@ -35,9 +35,9 @@ .breeze-release-management-add-back-references-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-add-back-references-r2 { fill: #c5c8c6 } .breeze-release-management-add-back-references-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-add-back-references-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-add-back-references-r5 { fill: #868887 } -.breeze-release-management-add-back-references-r6 { fill: #cc555a } +.breeze-release-management-add-back-references-r4 { fill: #868887 } +.breeze-release-management-add-back-references-r5 { fill: #cc555a } +.breeze-release-management-add-back-references-r6 { fill: #68a0b3;font-weight: bold } .breeze-release-management-add-back-references-r7 { fill: #98a84b;font-weight: bold } .breeze-release-management-add-back-references-r8 { fill: #8d7b39 } .breeze-release-management-add-back-references-r9 { fill: #8a4346 } @@ -153,7 +153,7 @@ -Usage: breeze release-management add-back-references [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management add-back-references [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                      apache.cassandra | apache.drill | apache.druid | apache.flink |                                                      apache.hdfs | apache.hive | apache.impala | apache.kafka |                                                      apache.kylin | apache.livy | apache.pig | apache.pinot | @@ -172,18 +172,18 @@ Command to add back references for documentation to make it backward compatible -╭─ Add Back References to Docs ────────────────────────────────────────────────────────────────────────────────────────╮ -*--airflow-site-directory-aLocal directory path of cloned airflow-site repo.(DIRECTORY)[required] ---gen-type-gType of back references to generate. Forced to providers if providers specified as  -arguments.                                                                          -(airflow | helm | providers)                                                        -[default: airflow]                                                                  -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Add Back References to Docs ────────────────────────────────────────────────────────────────────────────────────────╮ +*--airflow-site-directory-aLocal directory path of cloned airflow-site repo.(DIRECTORY)[required] +--gen-type-gType of back references to generate. Forced to providers if providers specified as  +arguments.                                                                          +(airflow | helm | providers)                                                        +[default: airflow]                                                                  +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_install-provider-packages.svg b/images/breeze/output_release-management_install-provider-packages.svg index 46d7b8041d6a..ace5c58fe8b1 100644 --- a/images/breeze/output_release-management_install-provider-packages.svg +++ b/images/breeze/output_release-management_install-provider-packages.svg @@ -35,8 +35,8 @@ .breeze-release-management-install-provider-packages-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-install-provider-packages-r2 { fill: #c5c8c6 } .breeze-release-management-install-provider-packages-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-install-provider-packages-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-install-provider-packages-r5 { fill: #868887 } +.breeze-release-management-install-provider-packages-r4 { fill: #868887 } +.breeze-release-management-install-provider-packages-r5 { fill: #68a0b3;font-weight: bold } .breeze-release-management-install-provider-packages-r6 { fill: #8d7b39 } .breeze-release-management-install-provider-packages-r7 { fill: #98a84b;font-weight: bold } @@ -172,44 +172,44 @@ -Usage: breeze release-management install-provider-packages [OPTIONS] +Usage: breeze release-management install-provider-packages [OPTIONS] Installs provider packages that can be found in dist. -╭─ Provider installation flags ────────────────────────────────────────────────────────────────────────────────────────╮ ---use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,      -`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages -or sdist packages available in dist folder respectively. Implies                ---mount-sources `remove`.                                                       -(none | wheel | sdist | <airflow_version>)                                      ---install-selected-providersComma-separated list of providers selected to be installed (implies             ---use-packages-from-dist).                                                      -(TEXT)                                                                          ---airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to     -specify constraints for the installed version and to find newer dependencies    -(TEXT)                                                                          ---airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) ---package-formatFormat of packages that should be installed from dist.(wheel | sdist) -[default: wheel]                                       ---skip-constraintsDo not use constraints when installing providers. ---debugDrop user in shell instead of running the command. Useful for debugging. ---github-repository-gGitHub repository used to pull, push run images.(TEXT) -[default: apache/airflow]                        -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Parallel running ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). ---debug-resourcesWhether to show resource information while running in parallel. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Provider installation flags ────────────────────────────────────────────────────────────────────────────────────────╮ +--use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,      +`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages +or sdist packages available in dist folder respectively. Implies                +--mount-sources `remove`.                                                       +(none | wheel | sdist | <airflow_version>)                                      +--install-selected-providersComma-separated list of providers selected to be installed (implies             +--use-packages-from-dist).                                                      +(TEXT)                                                                          +--airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to     +specify constraints for the installed version and to find newer dependencies    +(TEXT)                                                                          +--airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) +--package-formatFormat of packages that should be installed from dist.(wheel | sdist) +[default: wheel]                                       +--skip-constraintsDo not use constraints when installing providers. +--debugDrop user in shell instead of running the command. Useful for debugging. +--github-repository-gGitHub repository used to pull, push run images.(TEXT) +[default: apache/airflow]                        +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Parallel running ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +--debug-resourcesWhether to show resource information while running in parallel. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_update-constraints.svg b/images/breeze/output_release-management_update-constraints.svg index 175e79211354..d58ab18d6208 100644 --- a/images/breeze/output_release-management_update-constraints.svg +++ b/images/breeze/output_release-management_update-constraints.svg @@ -35,9 +35,9 @@ .breeze-release-management-update-constraints-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-update-constraints-r2 { fill: #c5c8c6 } .breeze-release-management-update-constraints-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-update-constraints-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-update-constraints-r5 { fill: #868887 } -.breeze-release-management-update-constraints-r6 { fill: #cc555a } +.breeze-release-management-update-constraints-r4 { fill: #868887 } +.breeze-release-management-update-constraints-r5 { fill: #cc555a } +.breeze-release-management-update-constraints-r6 { fill: #68a0b3;font-weight: bold } .breeze-release-management-update-constraints-r7 { fill: #8d7b39 } .breeze-release-management-update-constraints-r8 { fill: #8a4346 } .breeze-release-management-update-constraints-r9 { fill: #98a84b;font-weight: bold } @@ -123,35 +123,27 @@ -Usage: breeze release-management update-constraints [OPTIONS] +Usage: breeze release-management update-constraints [OPTIONS] Update released constraints with manual changes. -╭─ Update constraints flags ───────────────────────────────────────────────────────────────────────────────────────────╮ -*--constraints-repoPath where airflow repository is checked out, with ``constraints-main`` branch checked out. -(DIRECTORY)                                                                                 -[required]                                                                                  -*--commit-messageCommit message to use for the constraints update.(TEXT)[required] ---remote-nameName of the remote to push the changes to.(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Selection criteria ─────────────────────────────────────────────────────────────────────────────────────────────────╮ -*--airflow-versionsComma separated list of Airflow versions to update constraints for.(TEXT) -[required]                                                          ---airflow-constraints-modeLimit constraint update to only selected constraint mode - if selected. -(constraints-source-providers | constraints | constraints-no-providers) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Action to perform ──────────────────────────────────────────────────────────────────────────────────────────────────╮ ---updated-constraintConstraints to be set - in the form of `package==version`. Can be repeated(TEXT) ---comment-fileFile containing comment to be added to the constraint file before the first package (if not  -added yet).                                                                                  -(FILE)                                                                                       -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Update constraints flags ───────────────────────────────────────────────────────────────────────────────────────────╮ +*--constraints-repoPath where airflow repository is checked out, with ``constraints-main`` branch checked    +out.                                                                                      +(DIRECTORY)                                                                               +[required]                                                                                +--remote-nameName of the remote to push the changes to.(TEXT) +*--airflow-versionsComma separated list of Airflow versions to update constraints for.(TEXT)[required] +*--commit-messageCommit message to use for the constraints update.(TEXT)[required] +*--updated-constraintConstraints to be set - in the form of `package==version`. Can be repeated(TEXT) +[required]                                                                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_verify-provider-packages.svg b/images/breeze/output_release-management_verify-provider-packages.svg index 6f3670106d6a..9b80ed0cdb02 100644 --- a/images/breeze/output_release-management_verify-provider-packages.svg +++ b/images/breeze/output_release-management_verify-provider-packages.svg @@ -35,8 +35,8 @@ .breeze-release-management-verify-provider-packages-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-verify-provider-packages-r2 { fill: #c5c8c6 } .breeze-release-management-verify-provider-packages-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-verify-provider-packages-r4 { fill: #68a0b3;font-weight: bold } -.breeze-release-management-verify-provider-packages-r5 { fill: #868887 } +.breeze-release-management-verify-provider-packages-r4 { fill: #868887 } +.breeze-release-management-verify-provider-packages-r5 { fill: #68a0b3;font-weight: bold } .breeze-release-management-verify-provider-packages-r6 { fill: #8d7b39 } .breeze-release-management-verify-provider-packages-r7 { fill: #98a84b;font-weight: bold } @@ -151,37 +151,37 @@ -Usage: breeze release-management verify-provider-packages [OPTIONS] +Usage: breeze release-management verify-provider-packages [OPTIONS] Verifies if all provider code is following expectations for providers. -╭─ Provider verification flags ────────────────────────────────────────────────────────────────────────────────────────╮ ---use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,      -`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages -or sdist packages available in dist folder respectively. Implies                ---mount-sources `remove`.                                                       -(none | wheel | sdist | <airflow_version>)                                      ---install-selected-providersComma-separated list of providers selected to be installed (implies             ---use-packages-from-dist).                                                      -(TEXT)                                                                          ---airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to     -specify constraints for the installed version and to find newer dependencies    -(TEXT)                                                                          ---airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) ---use-packages-from-distInstall all found packages (--package-format determines type) from 'dist'       -folder when entering breeze.                                                    ---package-formatFormat of packages that should be installed from dist.(wheel | sdist) -[default: wheel]                                       ---skip-constraintsDo not use constraints when installing providers. ---debugDrop user in shell instead of running the command. Useful for debugging. ---github-repository-gGitHub repository used to pull, push run images.(TEXT) -[default: apache/airflow]                        -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Provider verification flags ────────────────────────────────────────────────────────────────────────────────────────╮ +--use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,      +`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages +or sdist packages available in dist folder respectively. Implies                +--mount-sources `remove`.                                                       +(none | wheel | sdist | <airflow_version>)                                      +--install-selected-providersComma-separated list of providers selected to be installed (implies             +--use-packages-from-dist).                                                      +(TEXT)                                                                          +--airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to     +specify constraints for the installed version and to find newer dependencies    +(TEXT)                                                                          +--airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) +--use-packages-from-distInstall all found packages (--package-format determines type) from 'dist'       +folder when entering breeze.                                                    +--package-formatFormat of packages that should be installed from dist.(wheel | sdist) +[default: wheel]                                       +--skip-constraintsDo not use constraints when installing providers. +--debugDrop user in shell instead of running the command. Useful for debugging. +--github-repository-gGitHub repository used to pull, push run images.(TEXT) +[default: apache/airflow]                        +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_sbom.svg b/images/breeze/output_sbom.svg index 205f0fa24269..0f42e678c179 100644 --- a/images/breeze/output_sbom.svg +++ b/images/breeze/output_sbom.svg @@ -35,8 +35,8 @@ .breeze-sbom-r1 { fill: #c5c8c6;font-weight: bold } .breeze-sbom-r2 { fill: #c5c8c6 } .breeze-sbom-r3 { fill: #d0b344;font-weight: bold } -.breeze-sbom-r4 { fill: #68a0b3;font-weight: bold } -.breeze-sbom-r5 { fill: #868887 } +.breeze-sbom-r4 { fill: #868887 } +.breeze-sbom-r5 { fill: #68a0b3;font-weight: bold } .breeze-sbom-r6 { fill: #98a84b;font-weight: bold } @@ -90,17 +90,17 @@ -Usage: breeze sbom [OPTIONSCOMMAND [ARGS]... +Usage: breeze sbom [OPTIONS] COMMAND [ARGS]... Tools that release managers can use to prepare sbom information -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ SBOM commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ -update-sbom-information                     Update SBOM information in airflow-site project.                       -generate-provider-requirements              Generate requirements for selected provider.                           -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ SBOM commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ +update-sbom-information                     Update SBOM information in airflow-site project.                       +generate-provider-requirements              Generate requirements for selected provider.                           +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_sbom_generate-provider-requirements.svg b/images/breeze/output_sbom_generate-provider-requirements.svg index 66b8c6d01a09..9dc8ae664aa2 100644 --- a/images/breeze/output_sbom_generate-provider-requirements.svg +++ b/images/breeze/output_sbom_generate-provider-requirements.svg @@ -35,8 +35,8 @@ .breeze-sbom-generate-provider-requirements-r1 { fill: #c5c8c6;font-weight: bold } .breeze-sbom-generate-provider-requirements-r2 { fill: #c5c8c6 } .breeze-sbom-generate-provider-requirements-r3 { fill: #d0b344;font-weight: bold } -.breeze-sbom-generate-provider-requirements-r4 { fill: #68a0b3;font-weight: bold } -.breeze-sbom-generate-provider-requirements-r5 { fill: #868887 } +.breeze-sbom-generate-provider-requirements-r4 { fill: #868887 } +.breeze-sbom-generate-provider-requirements-r5 { fill: #68a0b3;font-weight: bold } .breeze-sbom-generate-provider-requirements-r6 { fill: #8d7b39 } .breeze-sbom-generate-provider-requirements-r7 { fill: #cc555a } .breeze-sbom-generate-provider-requirements-r8 { fill: #8a4346 } @@ -144,34 +144,34 @@ -Usage: breeze sbom generate-provider-requirements [OPTIONS] +Usage: breeze sbom generate-provider-requirements [OPTIONS] Generate requirements for selected provider. -╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ ---airflow-versionAirflow version to use to generate the requirements(TEXT) ---pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) -*--provider-idProvider to generate the requirements for                                                   -(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid  -| apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |  -apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb  -| asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor  -| databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | -facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc |       -jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo |    -mysql | neo4j | odbc | openfaas | openlineage | opsgenie | oracle | pagerduty | papermill | -plexus | postgres | presto | qubole | redis | salesforce | samba | segment | sendgrid |     -sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram -| trino | vertica | zendesk)                                                                -[required]                                                                                  ---provider-versionProvider version to generate the requirements for(TEXT) -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ +--airflow-versionAirflow version to use to generate the requirements(TEXT) +--pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) +*--provider-idProvider to generate the requirements for                                                   +(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid  +| apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |  +apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb  +| asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor  +| databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | +facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc |       +jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo |    +mysql | neo4j | odbc | openfaas | openlineage | opsgenie | oracle | pagerduty | papermill | +plexus | postgres | presto | qubole | redis | salesforce | samba | segment | sendgrid |     +sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram +| trino | vertica | zendesk)                                                                +[required]                                                                                  +--provider-versionProvider version to generate the requirements for(TEXT) +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_setup.svg b/images/breeze/output_setup.svg index 3698b389cb23..566cad3ff843 100644 --- a/images/breeze/output_setup.svg +++ b/images/breeze/output_setup.svg @@ -35,8 +35,8 @@ .breeze-setup-r1 { fill: #c5c8c6;font-weight: bold } .breeze-setup-r2 { fill: #c5c8c6 } .breeze-setup-r3 { fill: #d0b344;font-weight: bold } -.breeze-setup-r4 { fill: #68a0b3;font-weight: bold } -.breeze-setup-r5 { fill: #868887 } +.breeze-setup-r4 { fill: #868887 } +.breeze-setup-r5 { fill: #68a0b3;font-weight: bold } .breeze-setup-r6 { fill: #98a84b;font-weight: bold } @@ -108,23 +108,23 @@ -Usage: breeze setup [OPTIONSCOMMAND [ARGS]... +Usage: breeze setup [OPTIONS] COMMAND [ARGS]... Tools that developers can use to configure Breeze -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Setup ──────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -autocomplete                    Enables autocompletion of breeze commands.                                         -self-upgrade                    Self upgrade Breeze.                                                               -config                          Show/update configuration (Python, Backend, Cheatsheet, ASCIIART).                 -regenerate-command-images       Regenerate breeze command images.                                                  -version                         Print information about version of apache-airflow-breeze.                          -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -check-all-params-in-groups                 Check that all parameters are put in groups.                            -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Setup ──────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +autocomplete                    Enables autocompletion of breeze commands.                                         +self-upgrade                    Self upgrade Breeze.                                                               +config                          Show/update configuration (Python, Backend, Cheatsheet, ASCIIART).                 +regenerate-command-images       Regenerate breeze command images.                                                  +version                         Print information about version of apache-airflow-breeze.                          +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +check-all-params-in-groups                 Check that all parameters are put in groups.                            +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_setup_config.svg b/images/breeze/output_setup_config.svg index 43fcb8bc983a..dd2cec4eaace 100644 --- a/images/breeze/output_setup_config.svg +++ b/images/breeze/output_setup_config.svg @@ -35,8 +35,8 @@ .breeze-setup-config-r1 { fill: #c5c8c6;font-weight: bold } .breeze-setup-config-r2 { fill: #c5c8c6 } .breeze-setup-config-r3 { fill: #d0b344;font-weight: bold } -.breeze-setup-config-r4 { fill: #68a0b3;font-weight: bold } -.breeze-setup-config-r5 { fill: #868887 } +.breeze-setup-config-r4 { fill: #868887 } +.breeze-setup-config-r5 { fill: #68a0b3;font-weight: bold } .breeze-setup-config-r6 { fill: #98a84b;font-weight: bold } .breeze-setup-config-r7 { fill: #8d7b39 } @@ -118,26 +118,26 @@ -Usage: breeze setup config [OPTIONS] +Usage: breeze setup config [OPTIONS] -Show/update configuration (Python, Backend, Cheatsheet, ASCIIART). +Show/update configuration (Python, Backend, Cheatsheet, ASCIIART). -╭─ Config flags ───────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11)                                  -[default: 3.8]                                               ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql) -[default: sqlite]        ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---cheatsheet/--no-cheatsheet-C/-cEnable/disable cheatsheet. ---asciiart/--no-asciiart-A/-aEnable/disable ASCIIart. ---colour/--no-colourEnable/disable Colour mode (useful for colour blind-friendly communication). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Config flags ───────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images. +(>3.8< | 3.9 | 3.10 | 3.11)                                  +[default: 3.8]                                               +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql) +[default: sqlite]        +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--cheatsheet/--no-cheatsheet-C/-cEnable/disable cheatsheet. +--asciiart/--no-asciiart-A/-aEnable/disable ASCIIart. +--colour/--no-colourEnable/disable Colour mode (useful for colour blind-friendly communication). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_shell.svg b/images/breeze/output_shell.svg index a8dd2ecd2eda..56d61fafbcc3 100644 --- a/images/breeze/output_shell.svg +++ b/images/breeze/output_shell.svg @@ -35,8 +35,8 @@ .breeze-shell-r1 { fill: #c5c8c6;font-weight: bold } .breeze-shell-r2 { fill: #c5c8c6 } .breeze-shell-r3 { fill: #d0b344;font-weight: bold } -.breeze-shell-r4 { fill: #68a0b3;font-weight: bold } -.breeze-shell-r5 { fill: #868887 } +.breeze-shell-r4 { fill: #868887 } +.breeze-shell-r5 { fill: #68a0b3;font-weight: bold } .breeze-shell-r6 { fill: #98a84b;font-weight: bold } .breeze-shell-r7 { fill: #8d7b39 } @@ -247,69 +247,69 @@ -Usage: breeze shell [OPTIONS] [EXTRA_ARGS]... +Usage: breeze shell [OPTIONS] [EXTRA_ARGS]... Enter breeze environment. this is the default command use when no other is selected. -╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                             -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |      -statsd | statsd | trino)                                                                  ---forward-credentials-fForward local credentials to container when running. ---db-reset-dReset DB when entering the container. ---max-timeMaximum time that the command should take - if it takes longer, the command will fail. -(INTEGER RANGE)                                                                        -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Choosing executor ──────────────────────────────────────────────────────────────────────────────────────────────────╮ ---executorSpecify the executor to use with airflow.(CeleryExecutor|LocalExecutor)[default: LocalExecutor] ---celery-brokerSpecify the celery message broker(rabbitmq|redis)[default: redis] ---celery-flowerStart celery flower -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building image before entering shell ───────────────────────────────────────────────────────────────────────────────╮ ---force-buildForce image build no matter if it is determined as needed. ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64) ---image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) -[default: latest]                                                               ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Mounting the sources and volumes ───────────────────────────────────────────────────────────────────────────────────╮ ---mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =        -selected).                                                                                  -(selected | all | skip | remove)                                                            -[default: selected]                                                                         ---include-mypy-volumeWhether to include mounting of the mypy volume (useful for debugging mypy). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Installing packages after entering shell ───────────────────────────────────────────────────────────────────────────╮ ---install-selected-providersComma-separated list of providers selected to be installed (implies               ---use-packages-from-dist).                                                        -(TEXT)                                                                            ---use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,        -`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages   -or sdist packages available in dist folder respectively. Implies --mount-sources -`remove`.                                                                         -(none | wheel | sdist | <airflow_version>)                                        ---airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to       -specify constraints for the installed version and to find newer dependencies      -(TEXT)                                                                            ---airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) ---use-packages-from-distInstall all found packages (--package-format determines type) from 'dist' folder  -when entering breeze.                                                             ---package-formatFormat of packages that should be installed from dist.(wheel | sdist) -[default: wheel]                                       -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                             +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |      +statsd | statsd | trino)                                                                  +--forward-credentials-fForward local credentials to container when running. +--db-reset-dReset DB when entering the container. +--max-timeMaximum time that the command should take - if it takes longer, the command will fail. +(INTEGER RANGE)                                                                        +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Choosing executor ──────────────────────────────────────────────────────────────────────────────────────────────────╮ +--executorSpecify the executor to use with airflow.(CeleryExecutor|LocalExecutor)[default: LocalExecutor] +--celery-brokerSpecify the celery message broker(rabbitmq|redis)[default: redis] +--celery-flowerStart celery flower +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building image before entering shell ───────────────────────────────────────────────────────────────────────────────╮ +--force-buildForce image build no matter if it is determined as needed. +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64) +--image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) +[default: latest]                                                               +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Mounting the sources and volumes ───────────────────────────────────────────────────────────────────────────────────╮ +--mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =        +selected).                                                                                  +(selected | all | skip | remove)                                                            +[default: selected]                                                                         +--include-mypy-volumeWhether to include mounting of the mypy volume (useful for debugging mypy). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Installing packages after entering shell ───────────────────────────────────────────────────────────────────────────╮ +--install-selected-providersComma-separated list of providers selected to be installed (implies               +--use-packages-from-dist).                                                        +(TEXT)                                                                            +--use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,        +`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages   +or sdist packages available in dist folder respectively. Implies --mount-sources +`remove`.                                                                         +(none | wheel | sdist | <airflow_version>)                                        +--airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to       +specify constraints for the installed version and to find newer dependencies      +(TEXT)                                                                            +--airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) +--use-packages-from-distInstall all found packages (--package-format determines type) from 'dist' folder  +when entering breeze.                                                             +--package-formatFormat of packages that should be installed from dist.(wheel | sdist) +[default: wheel]                                       +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_start-airflow.svg b/images/breeze/output_start-airflow.svg index 0773980eef13..6fe6745726f2 100644 --- a/images/breeze/output_start-airflow.svg +++ b/images/breeze/output_start-airflow.svg @@ -35,8 +35,8 @@ .breeze-start-airflow-r1 { fill: #c5c8c6;font-weight: bold } .breeze-start-airflow-r2 { fill: #c5c8c6 } .breeze-start-airflow-r3 { fill: #d0b344;font-weight: bold } -.breeze-start-airflow-r4 { fill: #68a0b3;font-weight: bold } -.breeze-start-airflow-r5 { fill: #868887 } +.breeze-start-airflow-r4 { fill: #868887 } +.breeze-start-airflow-r5 { fill: #68a0b3;font-weight: bold } .breeze-start-airflow-r6 { fill: #98a84b;font-weight: bold } .breeze-start-airflow-r7 { fill: #8d7b39 } @@ -256,72 +256,72 @@ -Usage: breeze start-airflow [OPTIONS] [EXTRA_ARGS]... +Usage: breeze start-airflow [OPTIONS] [EXTRA_ARGS]... Enter breeze environment and starts all Airflow components in the tmux session. Compile assets if contents of www  directory changed. -╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11)                                  -[default: 3.8]                                               ---load-example-dags-eEnable configuration to load example DAGs when starting Airflow. ---load-default-connections-cEnable configuration to load default connections when starting Airflow. ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64) ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                        -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | -statsd | statsd | trino)                                                             ---forward-credentials-fForward local credentials to container when running. ---db-reset-dReset DB when entering the container. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Choosing executor ──────────────────────────────────────────────────────────────────────────────────────────────────╮ ---executorSpecify the executor to use with airflow.(CeleryExecutor|LocalExecutor)[default: LocalExecutor] ---celery-brokerSpecify the celery message broker(rabbitmq|redis)[default: redis] ---celery-flowerStart celery flower -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Asset compilation options ──────────────────────────────────────────────────────────────────────────────────────────╮ ---skip-asset-compilationSkips compilation of assets when starting airflow even if the content of www changed     -(mutually exclusive with --dev-mode).                                                    ---dev-modeStarts webserver in dev mode (assets are always recompiled in this case when starting)   -(mutually exclusive with --skip-asset-compilation).                                      -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building image before entering shell ───────────────────────────────────────────────────────────────────────────────╮ ---force-buildForce image build no matter if it is determined as needed. ---image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) -[default: latest]                                                               ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Mounting the sources and volumes ───────────────────────────────────────────────────────────────────────────────────╮ ---mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default = selected). -(selected | all | skip | remove)                                                                -[default: selected]                                                                             -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Installing packages after entering shell ───────────────────────────────────────────────────────────────────────────╮ ---use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,        -`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages   -or sdist packages available in dist folder respectively. Implies --mount-sources -`remove`.                                                                         -(none | wheel | sdist | <airflow_version>)                                        ---airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to       -specify constraints for the installed version and to find newer dependencies      -(TEXT)                                                                            ---airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) ---use-packages-from-distInstall all found packages (--package-format determines type) from 'dist' folder  -when entering breeze.                                                             ---package-formatFormat of packages that should be installed from dist.(wheel | sdist) -[default: wheel]                                       -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images. +(>3.8< | 3.9 | 3.10 | 3.11)                                  +[default: 3.8]                                               +--load-example-dags-eEnable configuration to load example DAGs when starting Airflow. +--load-default-connections-cEnable configuration to load default connections when starting Airflow. +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64) +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                        +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | +statsd | statsd | trino)                                                             +--forward-credentials-fForward local credentials to container when running. +--db-reset-dReset DB when entering the container. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Choosing executor ──────────────────────────────────────────────────────────────────────────────────────────────────╮ +--executorSpecify the executor to use with airflow.(CeleryExecutor|LocalExecutor)[default: LocalExecutor] +--celery-brokerSpecify the celery message broker(rabbitmq|redis)[default: redis] +--celery-flowerStart celery flower +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Asset compilation options ──────────────────────────────────────────────────────────────────────────────────────────╮ +--skip-asset-compilationSkips compilation of assets when starting airflow even if the content of www changed     +(mutually exclusive with --dev-mode).                                                    +--dev-modeStarts webserver in dev mode (assets are always recompiled in this case when starting)   +(mutually exclusive with --skip-asset-compilation).                                      +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building image before entering shell ───────────────────────────────────────────────────────────────────────────────╮ +--force-buildForce image build no matter if it is determined as needed. +--image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) +[default: latest]                                                               +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Mounting the sources and volumes ───────────────────────────────────────────────────────────────────────────────────╮ +--mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default = selected). +(selected | all | skip | remove)                                                                +[default: selected]                                                                             +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Installing packages after entering shell ───────────────────────────────────────────────────────────────────────────╮ +--use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`,        +`wheel`, or `sdist` if Airflow should be removed, installed from wheel packages   +or sdist packages available in dist folder respectively. Implies --mount-sources +`remove`.                                                                         +(none | wheel | sdist | <airflow_version>)                                        +--airflow-constraints-referenceConstraint reference to use. Useful with --use-airflow-version parameter to       +specify constraints for the installed version and to find newer dependencies      +(TEXT)                                                                            +--airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) +--use-packages-from-distInstall all found packages (--package-format determines type) from 'dist' folder  +when entering breeze.                                                             +--package-formatFormat of packages that should be installed from dist.(wheel | sdist) +[default: wheel]                                       +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_static-checks.svg b/images/breeze/output_static-checks.svg index 1a19d59b1bfc..083d5f668118 100644 --- a/images/breeze/output_static-checks.svg +++ b/images/breeze/output_static-checks.svg @@ -35,8 +35,8 @@ .breeze-static-checks-r1 { fill: #c5c8c6;font-weight: bold } .breeze-static-checks-r2 { fill: #c5c8c6 } .breeze-static-checks-r3 { fill: #d0b344;font-weight: bold } -.breeze-static-checks-r4 { fill: #68a0b3;font-weight: bold } -.breeze-static-checks-r5 { fill: #868887 } +.breeze-static-checks-r4 { fill: #868887 } +.breeze-static-checks-r5 { fill: #68a0b3;font-weight: bold } .breeze-static-checks-r6 { fill: #98a84b;font-weight: bold } .breeze-static-checks-r7 { fill: #8d7b39 } @@ -280,80 +280,80 @@ -Usage: breeze static-checks [OPTIONS] [PRECOMMIT_ARGS]... +Usage: breeze static-checks [OPTIONS] [PRECOMMIT_ARGS]... Run static checks. -╭─ Pre-commit flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---type-tType(s) of the static checks to run.                                              -(all | black | blacken-docs | check-aiobotocore-optional |                        -check-airflow-k8s-not-used | check-airflow-provider-compatibility |               -check-apache-license-rat | check-base-operator-partial-arguments |                -check-base-operator-usage | check-boring-cyborg-configuration |                   -check-breeze-top-dependencies-limited | check-builtin-literals |                  -check-changelog-has-no-duplicates | check-cncf-k8s-only-for-executors |           -check-core-deprecation-classes | check-daysago-import-from-utils |                -check-decorated-operator-implements-custom-name | check-deferrable-default-value  -| check-docstring-param-types | check-example-dags-urls |                         -check-executables-have-shebangs | check-extra-packages-references |               -check-extras-order | check-for-inclusive-language | check-hooks-apply |           -check-incorrect-use-of-LoggingMixin | check-init-decorator-arguments |            -check-lazy-logging | check-links-to-example-dags-do-not-use-hardcoded-versions |  -check-merge-conflict | check-newsfragments-are-valid |                            -check-no-airflow-deprecation-in-providers | check-no-providers-in-core-examples | -check-no-relative-imports | check-only-new-session-with-provide-session |         -check-persist-credentials-disabled-in-github-workflows |                          -check-pre-commit-information-consistent | check-provide-create-sessions-imports | -check-provider-yaml-valid | check-providers-init-file-missing |                   -check-providers-subpackages-init-file-exist | check-pydevd-left-in-code |         -check-revision-heads-map | check-safe-filter-usage-in-html | check-setup-order |  -check-start-date-not-used-in-defaults | check-system-tests-present |              -check-system-tests-tocs | check-tests-unittest-testcase |                         -check-urlparse-usage-in-code | check-usage-of-re2-over-re | check-xml | codespell -| compile-www-assets | compile-www-assets-dev |                                   -create-missing-init-py-files-tests | debug-statements | detect-private-key |      -doctoc | end-of-file-fixer | fix-encoding-pragma | flynt | identity |             -insert-license | lint-chart-schema | lint-css | lint-dockerfile | lint-helm-chart -| lint-json-schema | lint-markdown | lint-openapi | mixed-line-ending | mypy-core -| mypy-dev | mypy-docs | mypy-providers | pretty-format-json | python-no-log-warn -| replace-bad-characters | rst-backticks | ruff | shellcheck |                    -trailing-whitespace | ts-compile-format-lint-www | update-black-version |         -update-breeze-cmd-output | update-breeze-readme-config-hash |                     -update-common-sql-api-stubs | update-er-diagram | update-extras |                 -update-in-the-wild-to-be-sorted | update-inlined-dockerfile-scripts |             -update-installed-providers-to-be-sorted | update-local-yml-file |                 -update-migration-references | update-providers-dependencies |                     -update-spelling-wordlist-to-be-sorted | update-supported-versions |               -update-vendored-in-k8s-json-schema | update-version | yamllint)                   ---show-diff-on-failure-sShow diff for files modified by the checks. ---initialize-environmentInitialize environment before running checks. ---max-initialization-attemptsMaximum number of attempts to initialize environment before giving up. -(INTEGER RANGE)                                                        -[default: 3; 1<=x<=10]                                                 -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Selecting files to run the checks on ───────────────────────────────────────────────────────────────────────────────╮ ---file-fList of files to run the checks on.(PATH) ---all-files-aRun checks on all files. ---commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference). Mutually     -exclusive with --last-commit.                                                                 -(TEXT)                                                                                        ---last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref. ---only-my-changes-mRun checks for commits belonging to my PR only: for all commits between merge base to `main`  -branch and HEAD of your branch.                                                               -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building image before running checks ───────────────────────────────────────────────────────────────────────────────╮ ---skip-image-checkSkip checking if the CI image is up to date. Useful if you run non-image checks only ---force-buildForce image build no matter if it is determined as needed. ---image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) -[default: latest]                                                               ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Pre-commit flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +--type-tType(s) of the static checks to run.                                              +(all | black | blacken-docs | check-aiobotocore-optional |                        +check-airflow-k8s-not-used | check-airflow-provider-compatibility |               +check-apache-license-rat | check-base-operator-partial-arguments |                +check-base-operator-usage | check-boring-cyborg-configuration |                   +check-breeze-top-dependencies-limited | check-builtin-literals |                  +check-changelog-has-no-duplicates | check-cncf-k8s-only-for-executors |           +check-core-deprecation-classes | check-daysago-import-from-utils |                +check-decorated-operator-implements-custom-name | check-deferrable-default-value  +| check-docstring-param-types | check-example-dags-urls |                         +check-executables-have-shebangs | check-extra-packages-references |               +check-extras-order | check-for-inclusive-language | check-hooks-apply |           +check-incorrect-use-of-LoggingMixin | check-init-decorator-arguments |            +check-lazy-logging | check-links-to-example-dags-do-not-use-hardcoded-versions |  +check-merge-conflict | check-newsfragments-are-valid |                            +check-no-airflow-deprecation-in-providers | check-no-providers-in-core-examples | +check-no-relative-imports | check-only-new-session-with-provide-session |         +check-persist-credentials-disabled-in-github-workflows |                          +check-pre-commit-information-consistent | check-provide-create-sessions-imports | +check-provider-yaml-valid | check-providers-init-file-missing |                   +check-providers-subpackages-init-file-exist | check-pydevd-left-in-code |         +check-revision-heads-map | check-safe-filter-usage-in-html | check-setup-order |  +check-start-date-not-used-in-defaults | check-system-tests-present |              +check-system-tests-tocs | check-tests-unittest-testcase |                         +check-urlparse-usage-in-code | check-usage-of-re2-over-re | check-xml | codespell +| compile-www-assets | compile-www-assets-dev |                                   +create-missing-init-py-files-tests | debug-statements | detect-private-key |      +doctoc | end-of-file-fixer | fix-encoding-pragma | flynt | identity |             +insert-license | lint-chart-schema | lint-css | lint-dockerfile | lint-helm-chart +| lint-json-schema | lint-markdown | lint-openapi | mixed-line-ending | mypy-core +| mypy-dev | mypy-docs | mypy-providers | pretty-format-json | python-no-log-warn +| replace-bad-characters | rst-backticks | ruff | shellcheck |                    +trailing-whitespace | ts-compile-format-lint-www | update-black-version |         +update-breeze-cmd-output | update-breeze-readme-config-hash |                     +update-common-sql-api-stubs | update-er-diagram | update-extras |                 +update-in-the-wild-to-be-sorted | update-inlined-dockerfile-scripts |             +update-installed-providers-to-be-sorted | update-local-yml-file |                 +update-migration-references | update-providers-dependencies |                     +update-spelling-wordlist-to-be-sorted | update-supported-versions |               +update-vendored-in-k8s-json-schema | update-version | yamllint)                   +--show-diff-on-failure-sShow diff for files modified by the checks. +--initialize-environmentInitialize environment before running checks. +--max-initialization-attemptsMaximum number of attempts to initialize environment before giving up. +(INTEGER RANGE)                                                        +[default: 3; 1<=x<=10]                                                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Selecting files to run the checks on ───────────────────────────────────────────────────────────────────────────────╮ +--file-fList of files to run the checks on.(PATH) +--all-files-aRun checks on all files. +--commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference). Mutually     +exclusive with --last-commit.                                                                 +(TEXT)                                                                                        +--last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref. +--only-my-changes-mRun checks for commits belonging to my PR only: for all commits between merge base to `main`  +branch and HEAD of your branch.                                                               +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building image before running checks ───────────────────────────────────────────────────────────────────────────────╮ +--skip-image-checkSkip checking if the CI image is up to date. Useful if you run non-image checks only +--force-buildForce image build no matter if it is determined as needed. +--image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) +[default: latest]                                                               +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)[default: autodetect] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_testing.svg b/images/breeze/output_testing.svg index 82c6f8e344e3..7d14280b5273 100644 --- a/images/breeze/output_testing.svg +++ b/images/breeze/output_testing.svg @@ -35,8 +35,8 @@ .breeze-testing-r1 { fill: #c5c8c6;font-weight: bold } .breeze-testing-r2 { fill: #c5c8c6 } .breeze-testing-r3 { fill: #d0b344;font-weight: bold } -.breeze-testing-r4 { fill: #68a0b3;font-weight: bold } -.breeze-testing-r5 { fill: #868887 } +.breeze-testing-r4 { fill: #868887 } +.breeze-testing-r5 { fill: #68a0b3;font-weight: bold } .breeze-testing-r6 { fill: #98a84b;font-weight: bold } @@ -96,19 +96,19 @@ -Usage: breeze testing [OPTIONSCOMMAND [ARGS]... +Usage: breeze testing [OPTIONS] COMMAND [ARGS]... Tools that developers can use to run tests -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Testing ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -tests                                    Run the specified unit test targets.                                      -integration-tests                        Run the specified integration tests.                                      -helm-tests                               Run Helm chart tests.                                                     -docker-compose-tests                     Run docker-compose tests.                                                 -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Testing ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +tests                                    Run the specified unit test targets.                                      +integration-tests                        Run the specified integration tests.                                      +helm-tests                               Run Helm chart tests.                                                     +docker-compose-tests                     Run docker-compose tests.                                                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_testing_integration-tests.svg b/images/breeze/output_testing_integration-tests.svg index be4d37e6e6c2..757f8c5dc613 100644 --- a/images/breeze/output_testing_integration-tests.svg +++ b/images/breeze/output_testing_integration-tests.svg @@ -35,8 +35,8 @@ .breeze-testing-integration-tests-r1 { fill: #c5c8c6;font-weight: bold } .breeze-testing-integration-tests-r2 { fill: #c5c8c6 } .breeze-testing-integration-tests-r3 { fill: #d0b344;font-weight: bold } -.breeze-testing-integration-tests-r4 { fill: #68a0b3;font-weight: bold } -.breeze-testing-integration-tests-r5 { fill: #868887 } +.breeze-testing-integration-tests-r4 { fill: #868887 } +.breeze-testing-integration-tests-r5 { fill: #68a0b3;font-weight: bold } .breeze-testing-integration-tests-r6 { fill: #8d7b39 } .breeze-testing-integration-tests-r7 { fill: #98a84b;font-weight: bold } @@ -160,40 +160,40 @@ -Usage: breeze testing integration-tests [OPTIONS] [EXTRA_PYTEST_ARGS]... +Usage: breeze testing integration-tests [OPTIONS] [EXTRA_PYTEST_ARGS]... Run the specified integration tests. -╭─ Basic flag for integration tests command ───────────────────────────────────────────────────────────────────────────╮ ---integrationIntegration(s) to enable when running (can be more than one).                               -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | statsd -| statsd | trino)                                                                           ---test-timeoutTest timeout. Set the pytest setup, execution and teardown timeouts to this value -(INTEGER RANGE)                                                                   -[default: 60; x>=0]                                                               ---db-reset-dReset DB when entering the container. ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced flag for integration tests command ────────────────────────────────────────────────────────────────────────╮ ---image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) -[default: latest]                                                               ---mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =      -selected).                                                                                -(selected | all | skip | remove)                                                          -[default: selected]                                                                       ---skip-provider-testsSkip provider tests -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flag for integration tests command ───────────────────────────────────────────────────────────────────────────╮ +--integrationIntegration(s) to enable when running (can be more than one).                               +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | statsd +| statsd | trino)                                                                           +--test-timeoutTest timeout. Set the pytest setup, execution and teardown timeouts to this value +(INTEGER RANGE)                                                                   +[default: 60; x>=0]                                                               +--db-reset-dReset DB when entering the container. +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced flag for integration tests command ────────────────────────────────────────────────────────────────────────╮ +--image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip).(TEXT) +[default: latest]                                                               +--mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =      +selected).                                                                                +(selected | all | skip | remove)                                                          +[default: selected]                                                                       +--skip-provider-testsSkip provider tests +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_testing_tests.svg b/images/breeze/output_testing_tests.svg index ec3da499fa7f..9cdc350da969 100644 --- a/images/breeze/output_testing_tests.svg +++ b/images/breeze/output_testing_tests.svg @@ -35,8 +35,8 @@ .breeze-testing-tests-r1 { fill: #c5c8c6;font-weight: bold } .breeze-testing-tests-r2 { fill: #c5c8c6 } .breeze-testing-tests-r3 { fill: #d0b344;font-weight: bold } -.breeze-testing-tests-r4 { fill: #68a0b3;font-weight: bold } -.breeze-testing-tests-r5 { fill: #868887 } +.breeze-testing-tests-r4 { fill: #868887 } +.breeze-testing-tests-r5 { fill: #68a0b3;font-weight: bold } .breeze-testing-tests-r6 { fill: #98a84b;font-weight: bold } .breeze-testing-tests-r7 { fill: #8d7b39 } @@ -232,64 +232,64 @@ -Usage: breeze testing tests [OPTIONS] [EXTRA_PYTEST_ARGS]... +Usage: breeze testing tests [OPTIONS] [EXTRA_PYTEST_ARGS]... Run the specified unit test targets. -╭─ Basic flag for tests command ───────────────────────────────────────────────────────────────────────────────────────╮ ---test-typeType of test to run. With Providers, you can specify tests of which providers should be     -run: `Providers[airbyte,http]` or excluded from the full test suite:                        -`Providers[-amazon,google]`                                                                 -(All | API | Always | CLI | Core | Other | Providers | WWW | PlainAsserts | Postgres |      -MySQL | Quarantine)                                                                         ---test-timeoutTest timeout. Set the pytest setup, execution and teardown timeouts to this value -(INTEGER RANGE)                                                                   -[default: 60; x>=0]                                                               ---collect-onlyCollect tests only, do not run them. ---db-reset-dReset DB when entering the container. ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                               -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | statsd -| statsd | trino)                                                                           ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options for parallel test commands ─────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of Python versions. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---parallel-test-typesSpace separated list of test types used for testing in parallel.(TEXT) -[default: API Always CLI Core Other Providers WWW PlainAsserts]  ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced flag for tests command ────────────────────────────────────────────────────────────────────────────────────╮ ---image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip). -(TEXT)                                                                          -[default: latest]                                                               ---use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`, `wheel`,  -or `sdist` if Airflow should be removed, installed from wheel packages or sdist      -packages available in dist folder respectively. Implies --mount-sources `remove`.    -(none | wheel | sdist | <airflow_version>)                                           ---mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default = -selected).                                                                           -(selected | all | skip | remove)                                                     -[default: selected]                                                                  ---upgrade-botoRemove aiobotocore and upgrade botocore and boto to the latest version. ---remove-arm-packagesRemoves arm packages from the image to test if ARM collection works ---skip-docker-compose-downSkips running docker-compose down after tests -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flag for tests command ───────────────────────────────────────────────────────────────────────────────────────╮ +--test-typeType of test to run. With Providers, you can specify tests of which providers should be     +run: `Providers[airbyte,http]` or excluded from the full test suite:                        +`Providers[-amazon,google]`                                                                 +(All | API | Always | CLI | Core | Other | Providers | WWW | PlainAsserts | Postgres |      +MySQL | Quarantine)                                                                         +--test-timeoutTest timeout. Set the pytest setup, execution and teardown timeouts to this value +(INTEGER RANGE)                                                                   +[default: 60; x>=0]                                                               +--collect-onlyCollect tests only, do not run them. +--db-reset-dReset DB when entering the container. +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                               +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | statsd +| statsd | trino)                                                                           +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options for parallel test commands ─────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of Python versions. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--parallel-test-typesSpace separated list of test types used for testing in parallel.(TEXT) +[default: API Always CLI Core Other Providers WWW PlainAsserts]  +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced flag for tests command ────────────────────────────────────────────────────────────────────────────────────╮ +--image-tag-tTag of the image which is used to run the image (implies --mount-sources=skip). +(TEXT)                                                                          +[default: latest]                                                               +--use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`, `wheel`,  +or `sdist` if Airflow should be removed, installed from wheel packages or sdist      +packages available in dist folder respectively. Implies --mount-sources `remove`.    +(none | wheel | sdist | <airflow_version>)                                           +--mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default = +selected).                                                                           +(selected | all | skip | remove)                                                     +[default: selected]                                                                  +--upgrade-botoRemove aiobotocore and upgrade botocore and boto to the latest version. +--remove-arm-packagesRemoves arm packages from the image to test if ARM collection works +--skip-docker-compose-downSkips running docker-compose down after tests +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/scripts/docker/install_from_docker_context_files.sh b/scripts/docker/install_from_docker_context_files.sh index fa08f47626f8..cdb66875ac46 100644 --- a/scripts/docker/install_from_docker_context_files.sh +++ b/scripts/docker/install_from_docker_context_files.sh @@ -74,17 +74,42 @@ function install_airflow_and_providers_from_docker_context_files(){ return fi - echo - echo "${COLOR_BLUE}Force re-installing airflow and providers from local files with eager upgrade${COLOR_RESET}" - echo - # force reinstall all airflow + provider package local files with eager upgrade - set -x - pip install "${pip_flags[@]}" --root-user-action ignore --upgrade --upgrade-strategy eager \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} \ - ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} - set +x + if [[ ${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=} == "true" ]]; then + local python_version + python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') + local local_constraints_file=/docker-context-files/constraints-"${python_version}"/${AIRFLOW_CONSTRAINTS_MODE}-"${python_version}".txt + if [[ -f "${local_constraints_file}" ]]; then + echo + echo "${COLOR_BLUE}Installing docker-context-files packages with constraints found in ${local_constraints_file}${COLOR_RESET}" + echo + # force reinstall all airflow + provider packages with constraints found in + set -x + pip install "${pip_flags[@]}" --root-user-action ignore --upgrade \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint "${local_constraints_file}" \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + else + echo + echo "${COLOR_BLUE}Installing docker-context-files packages with constraints from GitHub${COLOR_RESET}" + echo + set -x + pip install "${pip_flags[@]}" --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + fi + else + echo + echo "${COLOR_BLUE}Installing docker-context-files packages without constraints${COLOR_RESET}" + echo + set -x + pip install "${pip_flags[@]}" --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + fi common::install_pip_version pip check } From 529346b6ada41f39a0a14d5dedd597b08b7034bb Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 27 Aug 2023 05:56:47 +0200 Subject: [PATCH 012/117] Bump airflow version to 2.7.1 --- airflow/__init__.py | 2 +- airflow/utils/db.py | 1 + docs/docker-stack/README.md | 10 +++++----- .../extending/add-airflow-configuration/Dockerfile | 2 +- .../extending/add-apt-packages/Dockerfile | 2 +- .../add-build-essential-extend/Dockerfile | 2 +- .../extending/add-providers/Dockerfile | 2 +- .../extending/add-pypi-packages/Dockerfile | 2 +- .../extending/add-requirement-packages/Dockerfile | 2 +- .../extending/custom-providers/Dockerfile | 2 +- .../extending/embedding-dags/Dockerfile | 2 +- .../extending/writable-directory/Dockerfile | 2 +- docs/docker-stack/entrypoint.rst | 14 +++++++------- 13 files changed, 23 insertions(+), 22 deletions(-) diff --git a/airflow/__init__.py b/airflow/__init__.py index f60d27db07e6..cd5ec8b7ac38 100644 --- a/airflow/__init__.py +++ b/airflow/__init__.py @@ -26,7 +26,7 @@ """ from __future__ import annotations -__version__ = "2.7.0" +__version__ = "2.7.1" # flake8: noqa: F401 diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 68425f8a4cb3..db49e37dca5a 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -100,6 +100,7 @@ "2.6.2": "c804e5c76e3e", "2.6.3": "c804e5c76e3e", "2.7.0": "788397e78828", + "2.7.1": "788397e78828", } diff --git a/docs/docker-stack/README.md b/docs/docker-stack/README.md index 1ad2bd30327e..3e1115d92aab 100644 --- a/docs/docker-stack/README.md +++ b/docs/docker-stack/README.md @@ -31,12 +31,12 @@ Every time a new version of Airflow is released, the images are prepared in the [apache/airflow DockerHub](https://hub.docker.com/r/apache/airflow) for all the supported Python versions. -You can find the following images there (Assuming Airflow version `2.7.0`): +You can find the following images there (Assuming Airflow version `2.7.1`): * `apache/airflow:latest` - the latest released Airflow image with default Python version (3.8 currently) * `apache/airflow:latest-pythonX.Y` - the latest released Airflow image with specific Python version -* `apache/airflow:2.7.0` - the versioned Airflow image with default Python version (3.8 currently) -* `apache/airflow:2.7.0-pythonX.Y` - the versioned Airflow image with specific Python version +* `apache/airflow:2.7.1` - the versioned Airflow image with default Python version (3.8 currently) +* `apache/airflow:2.7.1-pythonX.Y` - the versioned Airflow image with specific Python version Those are "reference" regular images. They contain the most common set of extras, dependencies and providers that are often used by the users and they are good to "try-things-out" when you want to just take Airflow for a spin, @@ -47,8 +47,8 @@ via [Building the image](https://airflow.apache.org/docs/docker-stack/build.html * `apache/airflow:slim-latest` - the latest released Airflow image with default Python version (3.8 currently) * `apache/airflow:slim-latest-pythonX.Y` - the latest released Airflow image with specific Python version -* `apache/airflow:slim-2.7.0` - the versioned Airflow image with default Python version (3.8 currently) -* `apache/airflow:slim-2.7.0-pythonX.Y` - the versioned Airflow image with specific Python version +* `apache/airflow:slim-2.7.1` - the versioned Airflow image with default Python version (3.8 currently) +* `apache/airflow:slim-2.7.1-pythonX.Y` - the versioned Airflow image with specific Python version The Apache Airflow image provided as convenience package is optimized for size, and it provides just a bare minimal set of the extras and dependencies installed and in most cases diff --git a/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile b/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile index bd54cbad0257..f37f16b9619f 100644 --- a/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-airflow-configuration/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 ENV AIRFLOW__CORE__LOAD_EXAMPLES=True ENV AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=my_conn_string # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile b/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile index 1e8fb8990577..6a034764360d 100644 --- a/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-apt-packages/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile b/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile index 5e34806903e5..fc7f3a74e313 100644 --- a/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-build-essential-extend/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile b/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile index 83641efdfb58..94870a3019d1 100644 --- a/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-providers/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 USER root RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile b/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile index cc1dd277cb45..7cca44228e42 100644 --- a/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-pypi-packages/Dockerfile @@ -15,6 +15,6 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" lxml # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile b/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile index ec3773d91632..525296b91e24 100644 --- a/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/add-requirement-packages/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 COPY requirements.txt / RUN pip install --no-cache-dir "apache-airflow==${AIRFLOW_VERSION}" -r /requirements.txt # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile b/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile index fc895abcc5fc..0b7f2eca7e23 100644 --- a/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/custom-providers/Dockerfile @@ -15,6 +15,6 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 RUN pip install "apache-airflow==${AIRFLOW_VERSION}" --no-cache-dir apache-airflow-providers-docker==2.5.1 # [END Dockerfile] diff --git a/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile b/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile index 842d84d939c6..bce131ee7375 100644 --- a/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/embedding-dags/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 COPY --chown=airflow:root test_dag.py /opt/airflow/dags diff --git a/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile b/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile index 4f7f80ecf49a..5721fa4ba453 100644 --- a/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile +++ b/docs/docker-stack/docker-examples/extending/writable-directory/Dockerfile @@ -15,7 +15,7 @@ # This is an example Dockerfile. It is not intended for PRODUCTION use # [START Dockerfile] -FROM apache/airflow:2.7.0 +FROM apache/airflow:2.7.1 RUN umask 0002; \ mkdir -p ~/writeable-directory # [END Dockerfile] diff --git a/docs/docker-stack/entrypoint.rst b/docs/docker-stack/entrypoint.rst index 077c6f2bd00f..71cb438faca9 100644 --- a/docs/docker-stack/entrypoint.rst +++ b/docs/docker-stack/entrypoint.rst @@ -132,7 +132,7 @@ if you specify extra arguments. For example: .. code-block:: bash - docker run -it apache/airflow:2.7.0-python3.8 bash -c "ls -la" + docker run -it apache/airflow:2.7.1-python3.8 bash -c "ls -la" total 16 drwxr-xr-x 4 airflow root 4096 Jun 5 18:12 . drwxr-xr-x 1 root root 4096 Jun 5 18:12 .. @@ -144,7 +144,7 @@ you pass extra parameters. For example: .. code-block:: bash - > docker run -it apache/airflow:2.7.0-python3.8 python -c "print('test')" + > docker run -it apache/airflow:2.7.1-python3.8 python -c "print('test')" test If first argument equals to "airflow" - the rest of the arguments is treated as an airflow command @@ -152,13 +152,13 @@ to execute. Example: .. code-block:: bash - docker run -it apache/airflow:2.7.0-python3.8 airflow webserver + docker run -it apache/airflow:2.7.1-python3.8 airflow webserver If there are any other arguments - they are simply passed to the "airflow" command .. code-block:: bash - > docker run -it apache/airflow:2.7.0-python3.8 help + > docker run -it apache/airflow:2.7.1-python3.8 help usage: airflow [-h] GROUP_OR_COMMAND ... positional arguments: @@ -363,7 +363,7 @@ database and creating an ``admin/admin`` Admin user with the following command: --env "_AIRFLOW_DB_MIGRATE=true" \ --env "_AIRFLOW_WWW_USER_CREATE=true" \ --env "_AIRFLOW_WWW_USER_PASSWORD=admin" \ - apache/airflow:2.7.0-python3.8 webserver + apache/airflow:2.7.1-python3.8 webserver .. code-block:: bash @@ -372,7 +372,7 @@ database and creating an ``admin/admin`` Admin user with the following command: --env "_AIRFLOW_DB_MIGRATE=true" \ --env "_AIRFLOW_WWW_USER_CREATE=true" \ --env "_AIRFLOW_WWW_USER_PASSWORD_CMD=echo admin" \ - apache/airflow:2.7.0-python3.8 webserver + apache/airflow:2.7.1-python3.8 webserver The commands above perform initialization of the SQLite database, create admin user with admin password and Admin role. They also forward local port ``8080`` to the webserver port and finally start the webserver. @@ -412,6 +412,6 @@ Example: --env "_AIRFLOW_DB_MIGRATE=true" \ --env "_AIRFLOW_WWW_USER_CREATE=true" \ --env "_AIRFLOW_WWW_USER_PASSWORD_CMD=echo admin" \ - apache/airflow:2.7.0-python3.8 webserver + apache/airflow:2.7.1-python3.8 webserver This method is only available starting from Docker image of Airflow 2.1.1 and above. From b3bb92a461b2d72d859b49c84468ed6f8c230340 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 27 Aug 2023 17:12:20 +0200 Subject: [PATCH 013/117] Use PyPI constraints for PROD image in non-main branch (#33789) When we are building PROD image in CI for non main branch, we are installing providers from PyPI rather than building them locally from sources. Therefore we should use `PyPI` constraints for such builds not the "source" constraints (they might differ). This PR adds two steps: * In the CI build, when we do not build providers we generate PyPI constraints additionally to source constraints * In the PROD build we use the PyPI constraints in case we do not build providers locally (cherry picked from commit f9276f0597d5ad91c98d0db36d16d799ee2d4f21) --- .github/actions/build-ci-images/action.yml | 8 +++++++- .github/actions/build-prod-images/action.yml | 12 +++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/actions/build-ci-images/action.yml b/.github/actions/build-ci-images/action.yml index d43a42528449..415819008c45 100644 --- a/.github/actions/build-ci-images/action.yml +++ b/.github/actions/build-ci-images/action.yml @@ -34,12 +34,18 @@ runs: - name: "Build & Push AMD64 CI images ${{ env.IMAGE_TAG }} ${{ env.PYTHON_VERSIONS }}" shell: bash run: breeze ci-image build --push --tag-as-latest --run-in-parallel --upgrade-on-failure - - name: "Show dependencies to be upgraded" + - name: "Generate source constraints" shell: bash run: > breeze release-management generate-constraints --run-in-parallel --airflow-constraints-mode constraints-source-providers if: env.UPGRADE_TO_NEWER_DEPENDENCIES != 'false' + - name: "Generate PyPI constraints" + shell: bash + run: > + breeze release-management generate-constraints --run-in-parallel + --airflow-constraints-mode constraints + if: env.UPGRADE_TO_NEWER_DEPENDENCIES != 'false' and ${{ inputs.build-provider-packages != 'true' }} - name: "Print dependency upgrade summary" shell: bash run: | diff --git a/.github/actions/build-prod-images/action.yml b/.github/actions/build-prod-images/action.yml index feac8c2ef270..1223b5a7f1ba 100644 --- a/.github/actions/build-prod-images/action.yml +++ b/.github/actions/build-prod-images/action.yml @@ -62,7 +62,7 @@ runs: name: constraints path: ./docker-context-files if: env.UPGRADE_TO_NEWER_DEPENDENCIES != 'false' - - name: "Build & Push PROD images ${{ env.IMAGE_TAG }}:${{ env.PYTHON_VERSIONS }}" + - name: "Build & Push PROD images with source providers ${{ env.IMAGE_TAG }}:${{ env.PYTHON_VERSIONS }}" shell: bash run: > breeze prod-image build --tag-as-latest --run-in-parallel --push @@ -70,6 +70,16 @@ runs: --use-constraints-for-context-packages env: COMMIT_SHA: ${{ github.sha }} + if: ${{ inputs.build-provider-packages == 'true' }} + - name: "Build & Push PROD images with PyPi providers ${{ env.IMAGE_TAG }}:${{ env.PYTHON_VERSIONS }}" + shell: bash + run: > + breeze prod-image build --tag-as-latest --run-in-parallel --push + --install-packages-from-context --airflow-constraints-mode constraints + --use-constraints-for-context-packages + env: + COMMIT_SHA: ${{ github.sha }} + if: ${{ inputs.build-provider-packages != 'true' }} - name: "Fix ownership" shell: bash run: breeze ci fix-ownership From 22d09d30dfb4e9775e969ac78a882607e1c088dd Mon Sep 17 00:00:00 2001 From: Hussein Awala Date: Fri, 18 Aug 2023 21:17:07 +0200 Subject: [PATCH 014/117] Fix rendering the mapped parameters when using `expand_kwargs` method (#32272) * Fix rendering the mapped parameters in the mapped operator Signed-off-by: Hussein Awala * add template_in_template arg to expand method to tell Airflow whether to resolve the xcom data or not * fix dag serialization tests * Revert "fix dag serialization tests" This reverts commit 191351cda7b51bc6d49e7fcee5ab8ccd6cd219f5. * Revert "add template_in_template arg to expand method to tell Airflow whether to resolve the xcom data or not" This reverts commit 14bd392c7e5c6f25ce25d61dfd440e9b27c1bc2e. * Fix ListOfDictsExpandInput resolve method * remove _iter_parse_time_resolved_kwargs method * remove unnecessary step --------- Signed-off-by: Hussein Awala (cherry picked from commit d1e6a5c48d03322dda090113134f745d1f9c34d4) --- airflow/models/expandinput.py | 5 +- tests/models/test_mappedoperator.py | 89 +++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 13 deletions(-) diff --git a/airflow/models/expandinput.py b/airflow/models/expandinput.py index 36fb5f41650a..a9128568d37d 100644 --- a/airflow/models/expandinput.py +++ b/airflow/models/expandinput.py @@ -265,7 +265,10 @@ def resolve(self, context: Context, session: Session) -> tuple[Mapping[str, Any] f"expand_kwargs() input dict keys must all be str, " f"but {key!r} is of type {_describe_type(key)}" ) - return mapping, {id(v) for v in mapping.values()} + # filter out parse time resolved values from the resolved_oids + resolved_oids = {id(v) for k, v in mapping.items() if not _is_parse_time_mappable(v)} + + return mapping, resolved_oids EXPAND_INPUT_EMPTY = DictOfListsExpandInput({}) # Sentinel value. diff --git a/tests/models/test_mappedoperator.py b/tests/models/test_mappedoperator.py index a7f6d0660c76..6d4a2fbca5cf 100644 --- a/tests/models/test_mappedoperator.py +++ b/tests/models/test_mappedoperator.py @@ -20,6 +20,7 @@ import logging from collections import defaultdict from datetime import timedelta +from unittest import mock from unittest.mock import patch import pendulum @@ -399,17 +400,31 @@ def test_mapped_expand_against_params(dag_maker, dag_params, task_params, expect def test_mapped_render_template_fields_validating_operator(dag_maker, session): - class MyOperator(MockOperator): - def __init__(self, value, arg1, **kwargs): - assert isinstance(value, str), "value should have been resolved before unmapping" - assert isinstance(arg1, str), "value should have been resolved before unmapping" - super().__init__(arg1=arg1, **kwargs) - self.value = value + class MyOperator(BaseOperator): + template_fields = ("partial_template", "map_template", "file_template") + template_ext = (".ext",) + + def __init__( + self, partial_template, partial_static, map_template, map_static, file_template, **kwargs + ): + for value in [partial_template, partial_static, map_template, map_static, file_template]: + assert isinstance(value, str), "value should have been resolved before unmapping" + super().__init__(**kwargs) + self.partial_template = partial_template + self.partial_static = partial_static + self.map_template = map_template + self.map_static = map_static + self.file_template = file_template + + def execute(self, context): + pass with dag_maker(session=session): task1 = BaseOperator(task_id="op1") output1 = task1.output - mapped = MyOperator.partial(task_id="a", arg2="{{ ti.task_id }}").expand(value=output1, arg1=output1) + mapped = MyOperator.partial( + task_id="a", partial_template="{{ ti.task_id }}", partial_static="{{ ti.task_id }}" + ).expand(map_template=output1, map_static=output1, file_template=["/path/to/file.ext"]) dr = dag_maker.create_dagrun() ti: TaskInstance = dr.get_task_instance(task1.task_id, session=session) @@ -432,12 +447,62 @@ def __init__(self, value, arg1, **kwargs): mapped_ti.map_index = 0 assert isinstance(mapped_ti.task, MappedOperator) - mapped.render_template_fields(context=mapped_ti.get_template_context(session=session)) + with patch("builtins.open", mock.mock_open(read_data=b"loaded data")), patch( + "os.path.isfile", return_value=True + ), patch("os.path.getmtime", return_value=0): + mapped.render_template_fields(context=mapped_ti.get_template_context(session=session)) + assert isinstance(mapped_ti.task, MyOperator) + + assert mapped_ti.task.partial_template == "a", "Should be templated!" + assert mapped_ti.task.partial_static == "{{ ti.task_id }}", "Should not be templated!" + assert mapped_ti.task.map_template == "{{ ds }}", "Should not be templated!" + assert mapped_ti.task.map_static == "{{ ds }}", "Should not be templated!" + assert mapped_ti.task.file_template == "loaded data", "Should be templated!" + + +def test_mapped_expand_kwargs_render_template_fields_validating_operator(dag_maker, session): + class MyOperator(BaseOperator): + template_fields = ("partial_template", "map_template", "file_template") + template_ext = (".ext",) + + def __init__( + self, partial_template, partial_static, map_template, map_static, file_template, **kwargs + ): + for value in [partial_template, partial_static, map_template, map_static, file_template]: + assert isinstance(value, str), "value should have been resolved before unmapping" + super().__init__(**kwargs) + self.partial_template = partial_template + self.partial_static = partial_static + self.map_template = map_template + self.map_static = map_static + self.file_template = file_template + + def execute(self, context): + pass + + with dag_maker(session=session): + mapped = MyOperator.partial( + task_id="a", partial_template="{{ ti.task_id }}", partial_static="{{ ti.task_id }}" + ).expand_kwargs( + [{"map_template": "{{ ds }}", "map_static": "{{ ds }}", "file_template": "/path/to/file.ext"}] + ) + + dr = dag_maker.create_dagrun() + + mapped_ti: TaskInstance = dr.get_task_instance(mapped.task_id, session=session, map_index=0) + + assert isinstance(mapped_ti.task, MappedOperator) + with patch("builtins.open", mock.mock_open(read_data=b"loaded data")), patch( + "os.path.isfile", return_value=True + ), patch("os.path.getmtime", return_value=0): + mapped.render_template_fields(context=mapped_ti.get_template_context(session=session)) assert isinstance(mapped_ti.task, MyOperator) - assert mapped_ti.task.value == "{{ ds }}", "Should not be templated!" - assert mapped_ti.task.arg1 == "{{ ds }}", "Should not be templated!" - assert mapped_ti.task.arg2 == "a" + assert mapped_ti.task.partial_template == "a", "Should be templated!" + assert mapped_ti.task.partial_static == "{{ ti.task_id }}", "Should not be templated!" + assert mapped_ti.task.map_template == "2016-01-01", "Should be templated!" + assert mapped_ti.task.map_static == "{{ ds }}", "Should not be templated!" + assert mapped_ti.task.file_template == "loaded data", "Should be templated!" def test_mapped_render_nested_template_fields(dag_maker, session): @@ -534,7 +599,7 @@ def test_expand_kwargs_mapped_task_instance(dag_maker, session, num_existing_tis @pytest.mark.parametrize( "map_index, expected", [ - pytest.param(0, "{{ ds }}", id="0"), + pytest.param(0, "2016-01-01", id="0"), pytest.param(1, 2, id="1"), ], ) From 56ec8d78059bbe9fdf36f5584c4867a0d776f6e2 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 24 Aug 2023 11:52:43 +0200 Subject: [PATCH 015/117] Bring back hive support for Python 3.11 (#32607) Update airflow/providers/apache/hive/CHANGELOG.rst Co-authored-by: Tzu-ping Chung (cherry picked from commit 08188f80f4bb6c225eaf4f58e2742c982a230652) --- airflow/providers/apache/hive/CHANGELOG.rst | 1 + airflow/providers/apache/hive/provider.yaml | 11 +---------- generated/provider_dependencies.json | 7 ++----- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/airflow/providers/apache/hive/CHANGELOG.rst b/airflow/providers/apache/hive/CHANGELOG.rst index b761d8dc8cf2..a69abae6fe66 100644 --- a/airflow/providers/apache/hive/CHANGELOG.rst +++ b/airflow/providers/apache/hive/CHANGELOG.rst @@ -41,6 +41,7 @@ Misc * ``Add more accurate typing for DbApiHook.run method (#31846)`` * ``Move Hive configuration to Apache Hive provider (#32777)`` + 6.1.2 ..... diff --git a/airflow/providers/apache/hive/provider.yaml b/airflow/providers/apache/hive/provider.yaml index 78c2b7f486e4..04a36b7041fc 100644 --- a/airflow/providers/apache/hive/provider.yaml +++ b/airflow/providers/apache/hive/provider.yaml @@ -59,18 +59,9 @@ dependencies: - apache-airflow-providers-common-sql>=1.3.1 - hmsclient>=0.1.0 - pandas>=0.17.1 - - pyhive[hive]>=0.6.0 - # in case of Python 3.9 sasl library needs to be installed with version higher or equal than - # 0.3.1 because only that version supports Python 3.9. For other Python version pyhive[hive] pulls - # the sasl library anyway (and there sasl library version is not relevant) - - sasl>=0.3.1; python_version>="3.9" + - pyhive[hive_pure_sasl]>=0.7.0 - thrift>=0.9.2 -# Excluded because python-sasl is not yet compatible -# with 3.11. See https://github.com/cloudera/python-sasl/issues/30 -excluded-python-versions: - - "3.11" - integrations: - integration-name: Apache Hive external-doc-url: https://hive.apache.org/ diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index e2b1513f7e7e..e0aed4414fd6 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -119,8 +119,7 @@ "apache-airflow>=2.4.0", "hmsclient>=0.1.0", "pandas>=0.17.1", - "pyhive[hive]>=0.6.0", - "sasl>=0.3.1; python_version>=\"3.9\"", + "pyhive[hive_pure_sasl]>=0.7.0", "thrift>=0.9.2" ], "cross-providers-deps": [ @@ -132,9 +131,7 @@ "samba", "vertica" ], - "excluded-python-versions": [ - "3.11" - ] + "excluded-python-versions": [] }, "apache.impala": { "deps": [ From fc77886fd526f63c661e8a24c08b25cf1c234db3 Mon Sep 17 00:00:00 2001 From: Dan Hansen Date: Mon, 14 Aug 2023 03:51:18 -0700 Subject: [PATCH 016/117] [Models] [Postgres] Check if the dynamically-added index is in the table schema before adding (#32731) * Check if the index is in the table schema before adding * add pre-condition assertion * static checks * Update test_models.py * integrate upstream auth manager changes (cherry picked from commit 2950fd768541fc902d8f7218e4243e8d83414c51) --- airflow/auth/managers/fab/models/__init__.py | 8 ++- tests/auth/managers/fab/test_models.py | 62 ++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 tests/auth/managers/fab/test_models.py diff --git a/airflow/auth/managers/fab/models/__init__.py b/airflow/auth/managers/fab/models/__init__.py index cb11e8fb06a2..0bc26adb7eb4 100644 --- a/airflow/auth/managers/fab/models/__init__.py +++ b/airflow/auth/managers/fab/models/__init__.py @@ -255,11 +255,15 @@ class RegisterUser(Model): def add_index_on_ab_user_username_postgres(table, conn, **kw): if conn.dialect.name != "postgresql": return - table.indexes.add(Index("idx_ab_user_username", func.lower(table.c.username), unique=True)) + index_name = "idx_ab_user_username" + if not any(table_index.name == index_name for table_index in table.indexes): + table.indexes.add(Index(index_name, func.lower(table.c.username), unique=True)) @event.listens_for(RegisterUser.__table__, "before_create") def add_index_on_ab_register_user_username_postgres(table, conn, **kw): if conn.dialect.name != "postgresql": return - table.indexes.add(Index("idx_ab_register_user_username", func.lower(table.c.username), unique=True)) + index_name = "idx_ab_register_user_username" + if not any(table_index.name == index_name for table_index in table.indexes): + table.indexes.add(Index(index_name, func.lower(table.c.username), unique=True)) diff --git a/tests/auth/managers/fab/test_models.py b/tests/auth/managers/fab/test_models.py new file mode 100644 index 000000000000..f2703e8d66e4 --- /dev/null +++ b/tests/auth/managers/fab/test_models.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest import mock + +from sqlalchemy import Column, MetaData, String, Table + +from airflow.auth.managers.fab.models import ( + add_index_on_ab_register_user_username_postgres, + add_index_on_ab_user_username_postgres, +) + +_mock_conn = mock.MagicMock() +_mock_conn.dialect = mock.MagicMock() +_mock_conn.dialect.name = "postgresql" + + +def test_add_index_on_ab_user_username_postgres(): + table = Table("test_table", MetaData(), Column("username", String)) + + assert len(table.indexes) == 0 + + add_index_on_ab_user_username_postgres(table, _mock_conn) + + # Assert that the index was added to the table + assert len(table.indexes) == 1 + + add_index_on_ab_user_username_postgres(table, _mock_conn) + + # Assert that index is not re-added when the schema is recreated + assert len(table.indexes) == 1 + + +def test_add_index_on_ab_register_user_username_postgres(): + table = Table("test_table", MetaData(), Column("username", String)) + + assert len(table.indexes) == 0 + + add_index_on_ab_register_user_username_postgres(table, _mock_conn) + + # Assert that the index was added to the table + assert len(table.indexes) == 1 + + add_index_on_ab_register_user_username_postgres(table, _mock_conn) + + # Assert that index is not re-added when the schema is recreated + assert len(table.indexes) == 1 From 34aef4be89c6083285137580808691254e56131f Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Tue, 15 Aug 2023 18:35:16 +0100 Subject: [PATCH 017/117] Fix getting correct commit from multiple referenced PR (#33411) When a PR is referenced by other PRs, our dev tool for getting the correct commit lists the latest commit when looking for the commmit sha but we should get the oldest. (cherry picked from commit 5b104a9020510c75f1d5a7a211fd0c7cb1b45070) --- dev/airflow-github | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/airflow-github b/dev/airflow-github index 115ed48d1916..69ca8ec53cc0 100755 --- a/dev/airflow-github +++ b/dev/airflow-github @@ -83,9 +83,10 @@ def get_issue_type(issue): def get_commit_in_main_associated_with_pr(repo: git.Repo, issue: Issue) -> str | None: """For a PR, find the associated merged commit & return its SHA""" if issue.pull_request: - commit = repo.git.log(f"--grep=#{issue.number}", "origin/main", "--format=%H") + commit = repo.git.log("--reverse", f"--grep=#{issue.number}", "origin/main", "--format=%H") if commit: - return commit + # We only want the oldest commit that referenced this PR number + return commit.splitlines()[0] else: pr: PullRequest = issue.as_pull_request() if pr.is_merged(): From 04e94bf2b43d87c595fb193fd6bc794390f7aa41 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 15 Aug 2023 21:54:52 +0200 Subject: [PATCH 018/117] More complete fix for dev release scripts to filter commit for PR (#33418) This is a more complete fix to #33411. This is also a follow up on earlier implementation of #33261 that addressed checking if PRs are merged. This one applies the same pattern to finding commit but also improves it by checking if the (#NNNNNN) ends the subject - so even if the PR is in the same form in the message, it will be filtered out. The previous "--reverse" quick fix in #33411 had potential of problem in case there were releated PRs merged before the original PR (which is quite posssible when you have a series of PRs referring to each other. (cherry picked from commit 3766ab07d7061e946329443ed9a073ad3b41f457) --- dev/airflow-github | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/dev/airflow-github b/dev/airflow-github index 69ca8ec53cc0..8109521e4ab6 100755 --- a/dev/airflow-github +++ b/dev/airflow-github @@ -83,27 +83,31 @@ def get_issue_type(issue): def get_commit_in_main_associated_with_pr(repo: git.Repo, issue: Issue) -> str | None: """For a PR, find the associated merged commit & return its SHA""" if issue.pull_request: - commit = repo.git.log("--reverse", f"--grep=#{issue.number}", "origin/main", "--format=%H") - if commit: - # We only want the oldest commit that referenced this PR number - return commit.splitlines()[0] + log_output = repo.git.log(f"--grep=(#{issue.number})$", "origin/main", "--format=%H %s") + if log_output: + for commit_line in log_output.splitlines(): + # We only want the commit for the PR where squash-merge added (#PR) at the end of subject + if commit_line and commit_line.endswith(f"(#{issue.number})"): + return commit_line.split(" ")[0] + return None else: pr: PullRequest = issue.as_pull_request() if pr.is_merged(): - commit = pr.merge_commit_sha - return commit + return pr.merge_commit_sha return None def is_cherrypicked(repo: git.Repo, issue: Issue, previous_version: str | None = None) -> bool: """Check if a given issue is cherry-picked in the current branch or not""" - log_args = ["--format=%H", f"--grep=(#{issue.number})$"] + log_args = ["--format=%H %s", f"--grep=(#{issue.number})$"] if previous_version: log_args.append(previous_version + "..") - log = repo.git.log(*log_args) + log_output = repo.git.log(*log_args) - if log: - return True + for commit_line in log_output.splitlines(): + # We only want the commit for the PR where squash-merge added (#PR) at the end of subject + if commit_line and commit_line.endswith(f"(#{issue.number})"): + return True return False From aab0f4b4afeb22fd99d76dbd304c1b8fd552e2f6 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jens-scheffler-bosch@users.noreply.github.com> Date: Wed, 16 Aug 2023 08:04:52 +0200 Subject: [PATCH 019/117] Fix handling of default value and serialization of Param class (#33141) (cherry picked from commit 489ca1494691f6a0b717f9312e0ab8a2a7f76d96) --- airflow/models/param.py | 9 ++++-- airflow/serialization/enums.py | 1 + airflow/serialization/serialized_objects.py | 5 +++ tests/models/test_param.py | 31 +++++++++++++++++++ tests/serialization/test_dag_serialization.py | 7 +++-- 5 files changed, 48 insertions(+), 5 deletions(-) diff --git a/airflow/models/param.py b/airflow/models/param.py index 5bb1db3d4412..f4ae8067de50 100644 --- a/airflow/models/param.py +++ b/airflow/models/param.py @@ -138,13 +138,18 @@ def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: def dump(self) -> dict: """Dump the Param as a dictionary.""" - out_dict = {self.CLASS_IDENTIFIER: f"{self.__module__}.{self.__class__.__name__}"} + out_dict: dict[str, str | None] = { + self.CLASS_IDENTIFIER: f"{self.__module__}.{self.__class__.__name__}" + } out_dict.update(self.__dict__) + # Ensure that not set is translated to None + if self.value is NOTSET: + out_dict["value"] = None return out_dict @property def has_value(self) -> bool: - return self.value is not NOTSET + return self.value is not NOTSET and self.value is not None def serialize(self) -> dict: return {"value": self.value, "description": self.description, "schema": self.schema} diff --git a/airflow/serialization/enums.py b/airflow/serialization/enums.py index c83d9f53ef87..0b1c0ca009c3 100644 --- a/airflow/serialization/enums.py +++ b/airflow/serialization/enums.py @@ -55,3 +55,4 @@ class DagAttributeTypes(str, Enum): TASK_INSTANCE = "task_instance" DAG_RUN = "dag_run" DATA_SET = "data_set" + ARG_NOT_SET = "arg_not_set" diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index d89f2e22d464..aa8c5230d605 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -65,6 +65,7 @@ from airflow.utils.module_loading import import_string, qualname from airflow.utils.operator_resources import Resources from airflow.utils.task_group import MappedTaskGroup, TaskGroup +from airflow.utils.types import NOTSET, ArgNotSet if TYPE_CHECKING: from airflow.ti_deps.deps.base_ti_dep import BaseTIDep @@ -489,6 +490,8 @@ def serialize( return cls._encode(DatasetPydantic.from_orm(var).dict(), type_=DAT.DATA_SET) else: return cls.default_serialization(strict, var) + elif isinstance(var, ArgNotSet): + return cls._encode(None, type_=DAT.ARG_NOT_SET) else: return cls.default_serialization(strict, var) @@ -561,6 +564,8 @@ def deserialize(cls, encoded_var: Any, use_pydantic_models=False) -> Any: return DagRunPydantic.parse_obj(var) elif type_ == DAT.DATA_SET: return DatasetPydantic.parse_obj(var) + elif type_ == DAT.ARG_NOT_SET: + return NOTSET else: raise TypeError(f"Invalid type {type_!s} in deserialization.") diff --git a/tests/models/test_param.py b/tests/models/test_param.py index 4053cf657171..b73cfea15f4e 100644 --- a/tests/models/test_param.py +++ b/tests/models/test_param.py @@ -23,6 +23,7 @@ from airflow.decorators import task from airflow.exceptions import ParamValidationError, RemovedInAirflow3Warning from airflow.models.param import Param, ParamsDict +from airflow.serialization.serialized_objects import BaseSerialization from airflow.utils import timezone from airflow.utils.types import DagRunType from tests.test_utils.db import clear_db_dags, clear_db_runs, clear_db_xcom @@ -41,14 +42,20 @@ def test_null_param(self): with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): p.resolve() assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value p = Param(None) assert p.resolve() is None assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value p = Param(None, type="null") assert p.resolve() is None assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value with pytest.raises(ParamValidationError): p.resolve("test") @@ -222,6 +229,30 @@ def test_dump(self): assert dump["description"] == "world" assert dump["schema"] == {"type": "string", "minLength": 2} + @pytest.mark.parametrize( + "param", + [ + Param("my value", description="hello", schema={"type": "string"}), + Param("my value", description="hello"), + Param(None, description=None), + Param([True], type="array", items={"type": "boolean"}), + Param(), + ], + ) + def test_param_serialization(self, param: Param): + """ + Test to make sure that native Param objects can be correctly serialized + """ + + serializer = BaseSerialization() + serialized_param = serializer.serialize(param) + restored_param: Param = serializer.deserialize(serialized_param) + + assert restored_param.value == param.value + assert isinstance(restored_param, Param) + assert restored_param.description == param.description + assert restored_param.schema == param.schema + class TestParamsDict: def test_params_dict(self): diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 301e54a0ec99..4616bd30d2a4 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -899,20 +899,21 @@ def __init__(self, path: str): Param("my value", description="hello"), Param(None, description=None), Param([True], type="array", items={"type": "boolean"}), + Param(), ], ) - def test_full_param_roundtrip(self, param): + def test_full_param_roundtrip(self, param: Param): """ Test to make sure that only native Param objects are being passed as dag or task params """ - dag = DAG(dag_id="simple_dag", params={"my_param": param}) + dag = DAG(dag_id="simple_dag", schedule=None, params={"my_param": param}) serialized_json = SerializedDAG.to_json(dag) serialized = json.loads(serialized_json) SerializedDAG.validate_schema(serialized) dag = SerializedDAG.from_dict(serialized) - assert dag.params["my_param"] == param.value + assert dag.params.get_param("my_param").value == param.value observed_param = dag.params.get_param("my_param") assert isinstance(observed_param, Param) assert observed_param.description == param.description From ee20c88aa384c01a7b7300748f2466fdf2051f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Mon, 14 Aug 2023 06:46:38 +0000 Subject: [PATCH 020/117] Refactor: Simplify code in configuration.py (#33160) (cherry picked from commit 21656fcfe84b2e749b6b7bc9a3da4826c1af522f) --- airflow/configuration.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/airflow/configuration.py b/airflow/configuration.py index 57e8e5087d9e..213f34423064 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -19,6 +19,7 @@ import datetime import functools import io +import itertools as it import json import logging import multiprocessing @@ -172,9 +173,8 @@ def retrieve_configuration_description( from airflow.providers_manager import ProvidersManager for provider, config in ProvidersManager().provider_configs: - if selected_provider and provider != selected_provider: - continue - base_configuration_description.update(config) + if not selected_provider or provider == selected_provider: + base_configuration_description.update(config) return base_configuration_description @@ -473,13 +473,7 @@ def get_sections_including_defaults(self) -> list[str]: :return: list of section names """ - my_own_sections = self.sections() - - all_sections_from_defaults = list(self.configuration_description.keys()) - for section in my_own_sections: - if section not in all_sections_from_defaults: - all_sections_from_defaults.append(section) - return all_sections_from_defaults + return list(dict.fromkeys(it.chain(self.configuration_description, self.sections()))) def get_options_including_defaults(self, section: str) -> list[str]: """ @@ -489,13 +483,8 @@ def get_options_including_defaults(self, section: str) -> list[str]: :return: list of option names for the section given """ my_own_options = self.options(section) if self.has_section(section) else [] - all_options_from_defaults = list( - self.configuration_description.get(section, {}).get("options", {}).keys() - ) - for option in my_own_options: - if option not in all_options_from_defaults: - all_options_from_defaults.append(option) - return all_options_from_defaults + all_options_from_defaults = self.configuration_description.get(section, {}).get("options", {}) + return list(dict.fromkeys(it.chain(all_options_from_defaults, my_own_options))) def optionxform(self, optionstr: str) -> str: """ @@ -1537,7 +1526,7 @@ def _include_envs( continue if not display_sensitive and env_var != self._env_var_name("core", "unit_test_mode"): # Don't hide cmd/secret values here - if not env_var.lower().endswith("cmd") and not env_var.lower().endswith("secret"): + if not env_var.lower().endswith(("cmd", "secret")): if (section, key) in self.sensitive_config_values: opt = "< hidden >" elif raw: From 11845bb042e4e9928df7762d710d2adc917acab5 Mon Sep 17 00:00:00 2001 From: Wei Lee Date: Mon, 14 Aug 2023 18:05:28 +0800 Subject: [PATCH 021/117] respect soft_fail argument when ExternalTaskSensor runs in deferrable mode (#33196) (cherry picked from commit a1b5bdb25a6f9565ac5934a9a458e9b079ccf3ae) --- airflow/sensors/base.py | 6 +++++ airflow/sensors/external_task.py | 46 ++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index e217727785a5..e23894a28de1 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -330,6 +330,12 @@ def reschedule(self): def get_serialized_fields(cls): return super().get_serialized_fields() | {"reschedule"} + def raise_failed_or_skiping_exception(self, *, failed_message: str, skipping_message: str = "") -> None: + """Raise AirflowSkipException if self.soft_fail is set to True. Otherwise raise AirflowException.""" + if self.soft_fail: + raise AirflowSkipException(skipping_message or failed_message) + raise AirflowException(failed_message) + def poke_mode_only(cls): """ diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index 5e42820ffe99..9e48d3e1406e 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -222,6 +222,8 @@ def __init__( self.deferrable = deferrable self.poll_interval = poll_interval + self._skipping_message_postfix = " Skipping due to soft_fail." + def _get_dttm_filter(self, context): if self.execution_delta: dttm = context["logical_date"] - self.execution_delta @@ -274,32 +276,28 @@ def poke(self, context: Context, session: Session = NEW_SESSION) -> bool: # Fail if anything in the list has failed. if count_failed > 0: if self.external_task_ids: - if self.soft_fail: - raise AirflowSkipException( - f"Some of the external tasks {self.external_task_ids} " - f"in DAG {self.external_dag_id} failed. Skipping due to soft_fail." - ) - raise AirflowException( + failed_message = ( f"Some of the external tasks {self.external_task_ids} " f"in DAG {self.external_dag_id} failed." ) + + self.raise_failed_or_skiping_exception( + failed_message=failed_message, + skipping_message=f"{failed_message}{self._skipping_message_postfix}", + ) elif self.external_task_group_id: - if self.soft_fail: - raise AirflowSkipException( + self.raise_failed_or_skiping_exception( + failed_message=( f"The external task_group '{self.external_task_group_id}' " - f"in DAG '{self.external_dag_id}' failed. Skipping due to soft_fail." + f"in DAG '{self.external_dag_id}' failed." ) - raise AirflowException( - f"The external task_group '{self.external_task_group_id}' " - f"in DAG '{self.external_dag_id}' failed." ) - else: - if self.soft_fail: - raise AirflowSkipException( - f"The external DAG {self.external_dag_id} failed. Skipping due to soft_fail." - ) - raise AirflowException(f"The external DAG {self.external_dag_id} failed.") + failed_message = f"The external DAG {self.external_dag_id} failed." + self.raise_failed_or_skiping_exception( + failed_message=failed_message, + skipping_message=f"{failed_message}{self._skipping_message_postfix}", + ) count_skipped = -1 if self.skipped_states: @@ -354,12 +352,20 @@ def execute_complete(self, context, event=None): self.log.info("External task %s has executed successfully.", self.external_task_id) return None elif event["status"] == "timeout": - raise AirflowException("Dag was not started within 1 minute, assuming fail.") + failed_message = "Dag was not started within 1 minute, assuming fail." + self.raise_failed_or_skiping_exception( + failed_message=failed_message, + skipping_message=f"{failed_message}{self._skipping_message_postfix}", + ) else: - raise AirflowException( + failed_message = ( "Error occurred while trying to retrieve task status. Please, check the " "name of executed task and Dag." ) + self.raise_failed_or_skiping_exception( + failed_message=failed_message, + skipping_message=f"{failed_message}{self._skipping_message_postfix}", + ) def _check_for_existence(self, session) -> None: dag_to_wait = DagModel.get_current(self.external_dag_id, session) From bc9ea437538efb5e826a436059eabcaf74557d8a Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Wed, 9 Aug 2023 10:36:16 +0100 Subject: [PATCH 022/117] Fix pydantic warning about `orm_mode` rename (#33220) * Fix pydantic warning about `orm_mode` rename Pydantic 2 renamed orm_mode to from_attributes. This was missed during the upgrade to pydantic 2 and it gives excessive warning about the rename. This PR fixes it * Also rename from_orm to model_validate and use model_dump instead of dict * Fix Pydantic 1.x compatibility --------- Co-authored-by: Tzu-ping Chung (cherry picked from commit 75bb04bb6f37dab4083190e4a3d6436ace1cac31) --- airflow/serialization/pydantic/dag_run.py | 3 ++- airflow/serialization/pydantic/dataset.py | 12 ++++++++---- airflow/serialization/pydantic/job.py | 3 ++- airflow/serialization/pydantic/taskinstance.py | 3 ++- airflow/serialization/serialized_objects.py | 17 +++++++++++++---- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/airflow/serialization/pydantic/dag_run.py b/airflow/serialization/pydantic/dag_run.py index 1e1528eb3564..834b6164ae5b 100644 --- a/airflow/serialization/pydantic/dag_run.py +++ b/airflow/serialization/pydantic/dag_run.py @@ -47,4 +47,5 @@ class DagRunPydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. diff --git a/airflow/serialization/pydantic/dataset.py b/airflow/serialization/pydantic/dataset.py index 659e5a1899ca..096bda6ddd5d 100644 --- a/airflow/serialization/pydantic/dataset.py +++ b/airflow/serialization/pydantic/dataset.py @@ -31,7 +31,8 @@ class DagScheduleDatasetReferencePydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. class TaskOutletDatasetReferencePydantic(BaseModelPydantic): @@ -46,7 +47,8 @@ class TaskOutletDatasetReferencePydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. class DatasetPydantic(BaseModelPydantic): @@ -65,7 +67,8 @@ class DatasetPydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. class DatasetEventPydantic(BaseModelPydantic): @@ -83,4 +86,5 @@ class DatasetEventPydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. diff --git a/airflow/serialization/pydantic/job.py b/airflow/serialization/pydantic/job.py index b36a9826eb25..27c8ad8ca749 100644 --- a/airflow/serialization/pydantic/job.py +++ b/airflow/serialization/pydantic/job.py @@ -49,4 +49,5 @@ class JobPydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. diff --git a/airflow/serialization/pydantic/taskinstance.py b/airflow/serialization/pydantic/taskinstance.py index 236c42c26061..71d8ba576f11 100644 --- a/airflow/serialization/pydantic/taskinstance.py +++ b/airflow/serialization/pydantic/taskinstance.py @@ -60,7 +60,8 @@ class TaskInstancePydantic(BaseModelPydantic): class Config: """Make sure it deals automatically with SQLAlchemy ORM classes.""" - orm_mode = True + from_attributes = True + orm_mode = True # Pydantic 1.x compatibility. def xcom_pull( self, diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index aa8c5230d605..86d141f311b2 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -68,6 +68,8 @@ from airflow.utils.types import NOTSET, ArgNotSet if TYPE_CHECKING: + from pydantic import BaseModel + from airflow.ti_deps.deps.base_ti_dep import BaseTIDep HAS_KUBERNETES: bool @@ -480,14 +482,21 @@ def serialize( type_=DAT.SIMPLE_TASK_INSTANCE, ) elif use_pydantic_models and _ENABLE_AIP_44: + + def _pydantic_model_dump(model_cls: type[BaseModel], var: Any) -> dict[str, Any]: + try: + return model_cls.model_validate(var).model_dump() # type: ignore[attr-defined] + except AttributeError: # Pydantic 1.x compatibility. + return model_cls.from_orm(var).dict() # type: ignore[attr-defined] + if isinstance(var, Job): - return cls._encode(JobPydantic.from_orm(var).dict(), type_=DAT.BASE_JOB) + return cls._encode(_pydantic_model_dump(JobPydantic, var), type_=DAT.BASE_JOB) elif isinstance(var, TaskInstance): - return cls._encode(TaskInstancePydantic.from_orm(var).dict(), type_=DAT.TASK_INSTANCE) + return cls._encode(_pydantic_model_dump(TaskInstancePydantic, var), type_=DAT.TASK_INSTANCE) elif isinstance(var, DagRun): - return cls._encode(DagRunPydantic.from_orm(var).dict(), type_=DAT.DAG_RUN) + return cls._encode(_pydantic_model_dump(DagRunPydantic, var), type_=DAT.DAG_RUN) elif isinstance(var, Dataset): - return cls._encode(DatasetPydantic.from_orm(var).dict(), type_=DAT.DATA_SET) + return cls._encode(_pydantic_model_dump(DatasetPydantic, var), type_=DAT.DATA_SET) else: return cls.default_serialization(strict, var) elif isinstance(var, ArgNotSet): From 84369c62911f1a389d9997fbc8c6e92f5e62a08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Thu, 10 Aug 2023 07:08:19 +0000 Subject: [PATCH 023/117] Refactor: Simplify dict manipulation in metrics (#33264) (cherry picked from commit 01a6c1e7f8b0ed736d7c6f661e0c29ec023016b2) --- airflow/metrics/otel_logger.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/airflow/metrics/otel_logger.py b/airflow/metrics/otel_logger.py index 3168e9ae65d1..3e4701ca6077 100644 --- a/airflow/metrics/otel_logger.py +++ b/airflow/metrics/otel_logger.py @@ -309,12 +309,9 @@ def get_counter(self, name: str, attributes: Attributes = None): :param attributes: Counter attributes, used to generate a unique key to store the counter. """ key = _generate_key_name(name, attributes) - if key in self.map.keys(): - return self.map[key] - else: - new_counter = self._create_counter(name) - self.map[key] = new_counter - return new_counter + if key not in self.map: + self.map[key] = self._create_counter(name) + return self.map[key] def del_counter(self, name: str, attributes: Attributes = None) -> None: """ From d31f20b3c09de10f75178f95a918e0ba1c514ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Thu, 10 Aug 2023 22:50:55 +0000 Subject: [PATCH 024/117] Refactor: Simplify code in serialization (#33266) (cherry picked from commit 63c5df89d9a6db7c8f3c8a0a252588b98fb0b1a1) --- airflow/serialization/helpers.py | 3 ++- airflow/serialization/serde.py | 7 +++---- airflow/serialization/serialized_objects.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airflow/serialization/helpers.py b/airflow/serialization/helpers.py index d8e6afdc0a43..80f949dee355 100644 --- a/airflow/serialization/helpers.py +++ b/airflow/serialization/helpers.py @@ -32,9 +32,10 @@ def serialize_template_field(template_field: Any) -> str | dict | list | int | f def is_jsonable(x): try: json.dumps(x) - return True except (TypeError, OverflowError): return False + else: + return True if not is_jsonable(template_field): return str(template_field) diff --git a/airflow/serialization/serde.py b/airflow/serialization/serde.py index 8d0ed100ac07..a9a09d86db53 100644 --- a/airflow/serialization/serde.py +++ b/airflow/serialization/serde.py @@ -300,14 +300,13 @@ def _stringify(classname: str, version: int, value: T | None) -> str: s = f"{classname}@version={version}(" if isinstance(value, _primitives): - s += f"{value})" + s += f"{value}" elif isinstance(value, _builtin_collections): # deserialized values can be != str s += ",".join(str(deserialize(value, full=False))) elif isinstance(value, dict): - for k, v in value.items(): - s += f"{k}={deserialize(v, full=False)}," - s = s[:-1] + ")" + s += ",".join(f"{k}={deserialize(v, full=False)}" for k, v in value.items()) + s += ")" return s diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index 86d141f311b2..8147dbee43a4 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -1187,7 +1187,7 @@ def _deserialize_operator_extra_links(cls, encoded_op_links: list) -> dict[str, # } # ) - _operator_link_class_path, data = list(_operator_links_source.items())[0] + _operator_link_class_path, data = next(iter(_operator_links_source.items())) if _operator_link_class_path in get_operator_extra_links(): single_op_link_class = import_string(_operator_link_class_path) elif _operator_link_class_path in plugins_manager.registered_operator_link_classes: From 3439f91325fbfceb0179d48464824e277d95181e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Sun, 13 Aug 2023 23:44:34 +0000 Subject: [PATCH 025/117] Refactor: Simplify code in settings (#33267) (cherry picked from commit 95e9d83720584e2c098242dd60aa54f388dae1de) --- airflow/policies.py | 2 +- airflow/settings.py | 39 +++++++++++++++++-------------------- tests/core/test_policies.py | 4 ++-- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/airflow/policies.py b/airflow/policies.py index 175ae5bbf906..47c3dffcb22d 100644 --- a/airflow/policies.py +++ b/airflow/policies.py @@ -138,7 +138,7 @@ def get_airflow_context_vars(context): return {} -def make_plugin_from_local_settings(pm: pluggy.PluginManager, module, names: list[str]): +def make_plugin_from_local_settings(pm: pluggy.PluginManager, module, names: set[str]): """ Turn the functions from airflow_local_settings module into a custom/local plugin. diff --git a/airflow/settings.py b/airflow/settings.py index e51cd208d9f7..bdf70ecf27c8 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -465,11 +465,23 @@ def import_local_settings(): """Import airflow_local_settings.py files to allow overriding any configs in settings.py file.""" try: import airflow_local_settings - + except ModuleNotFoundError as e: + if e.name == "airflow_local_settings": + log.debug("No airflow_local_settings to import.", exc_info=True) + else: + log.critical( + "Failed to import airflow_local_settings due to a transitive module not found error.", + exc_info=True, + ) + raise + except ImportError: + log.critical("Failed to import airflow_local_settings.", exc_info=True) + raise + else: if hasattr(airflow_local_settings, "__all__"): - names = list(airflow_local_settings.__all__) + names = set(airflow_local_settings.__all__) else: - names = list(filter(lambda n: not n.startswith("__"), airflow_local_settings.__dict__.keys())) + names = {n for n in airflow_local_settings.__dict__ if not n.startswith("__")} if "policy" in names and "task_policy" not in names: warnings.warn( @@ -485,30 +497,15 @@ def import_local_settings(): POLICY_PLUGIN_MANAGER, airflow_local_settings, names ) - for name in names: - # If we have already handled a function by adding it to the plugin, then don't clobber the global - # function - if name in plugin_functions: - continue - + # If we have already handled a function by adding it to the plugin, + # then don't clobber the global function + for name in names - plugin_functions: globals()[name] = getattr(airflow_local_settings, name) if POLICY_PLUGIN_MANAGER.hook.task_instance_mutation_hook.get_hookimpls(): task_instance_mutation_hook.is_noop = False log.info("Loaded airflow_local_settings from %s .", airflow_local_settings.__file__) - except ModuleNotFoundError as e: - if e.name == "airflow_local_settings": - log.debug("No airflow_local_settings to import.", exc_info=True) - else: - log.critical( - "Failed to import airflow_local_settings due to a transitive module not found error.", - exc_info=True, - ) - raise - except ImportError: - log.critical("Failed to import airflow_local_settings.", exc_info=True) - raise def initialize(): diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py index e5fbf4fe682d..c7caff80b2c0 100644 --- a/tests/core/test_policies.py +++ b/tests/core/test_policies.py @@ -42,7 +42,7 @@ def dag_policy(dag): mod = Namespace(dag_policy=dag_policy) - policies.make_plugin_from_local_settings(plugin_manager, mod, ["dag_policy"]) + policies.make_plugin_from_local_settings(plugin_manager, mod, {"dag_policy"}) plugin_manager.hook.dag_policy(dag="a") @@ -64,7 +64,7 @@ def dag_policy(wrong_arg_name): mod = Namespace(dag_policy=dag_policy) - policies.make_plugin_from_local_settings(plugin_manager, mod, ["dag_policy"]) + policies.make_plugin_from_local_settings(plugin_manager, mod, {"dag_policy"}) plugin_manager.hook.dag_policy(dag="passed_dag_value") From cff420b3c3aef0b2a967b184b9e39e523ec07bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Thu, 10 Aug 2023 22:54:13 +0000 Subject: [PATCH 026/117] Refactor: Simplify code in utils (#33268) (cherry picked from commit cc8519d1bedd665d89834343171ecf62e72d8f2a) --- airflow/utils/file.py | 3 +-- airflow/utils/log/file_task_handler.py | 11 +++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/airflow/utils/file.py b/airflow/utils/file.py index fb17ee6a8d57..2b230b61290b 100644 --- a/airflow/utils/file.py +++ b/airflow/utils/file.py @@ -22,7 +22,6 @@ import logging import os import zipfile -from collections import OrderedDict from pathlib import Path from typing import Generator, NamedTuple, Pattern, Protocol, overload @@ -230,7 +229,7 @@ def _find_path_from_directory( ] # evaluation order of patterns is important with negation # so that later patterns can override earlier patterns - patterns = list(OrderedDict.fromkeys(patterns).keys()) + patterns = list(dict.fromkeys(patterns)) dirs[:] = [subdir for subdir in dirs if not ignore_rule_type.match(Path(root) / subdir, patterns)] diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py index 6c8073b005b6..2a7bd688b5ae 100644 --- a/airflow/utils/log/file_task_handler.py +++ b/airflow/utils/log/file_task_handler.py @@ -491,12 +491,11 @@ def _init_file(self, ti): @staticmethod def _read_from_local(worker_log_path: Path) -> tuple[list[str], list[str]]: messages = [] - logs = [] - files = list(worker_log_path.parent.glob(worker_log_path.name + "*")) - if files: - messages.extend(["Found local files:", *[f" * {x}" for x in sorted(files)]]) - for file in sorted(files): - logs.append(Path(file).read_text()) + paths = sorted(worker_log_path.parent.glob(worker_log_path.name + "*")) + if paths: + messages.append("Found local files:") + messages.extend(f" * {x}" for x in paths) + logs = [file.read_text() for file in paths] return messages, logs def _read_from_logs_server(self, ti, worker_log_rel_path) -> tuple[list[str], list[str]]: From 496f917f1af3792930d73bdf24038a7052f7511f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Thu, 10 Aug 2023 10:48:09 +0000 Subject: [PATCH 027/117] Refactor: Simplify code in www (#33270) (cherry picked from commit 369b9bc947215d396570b06c3a5e7b982f234e76) --- airflow/www/views.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 59c9bd31a7a3..eee5788275be 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -3264,8 +3264,7 @@ def duration(self, dag_id: str, session: Session = NEW_SESSION): y=scale_time_units(cumulative_y[task_id], cum_y_unit), ) - dates = sorted({ti.execution_date for ti in task_instances}) - max_date = max(ti.execution_date for ti in task_instances) if dates else None + max_date = max((ti.execution_date for ti in task_instances), default=None) session.commit() @@ -3364,8 +3363,7 @@ def tries(self, dag_id: str, session: Session = NEW_SESSION): if x_points: chart.add_serie(name=task.task_id, x=x_points, y=y_points) - tries = sorted({ti.try_number for ti in tis}) - max_date = max(ti.execution_date for ti in tis) if tries else None + max_date = max((ti.execution_date for ti in tis), default=None) chart.create_y_axis("yAxis", format=".02f", custom_format=False, label="Tries") chart.axislist["yAxis"]["axisLabelDistance"] = "-15" @@ -3890,7 +3888,7 @@ def datasets_summary(self): updated_before = _safe_parse_datetime(request.args.get("updated_before"), allow_empty=True) # Check and clean up query parameters - limit = 50 if limit > 50 else limit + limit = min(50, limit) uri_pattern = uri_pattern[:4000] From 4e4911b59068616e98fc6776e4db6ee7bb1c2510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Sun, 13 Aug 2023 23:33:55 +0000 Subject: [PATCH 028/117] Refactor: Simplify code in scripts (#33295) (cherry picked from commit 50a6385c7fed0fd5457886d0ffdd5040f6a8d511) --- .../test_kubernetes_pod_operator.py | 2 +- .../pre_commit_check_pre_commit_hooks.py | 5 +---- .../ci/pre_commit/pre_commit_json_schema.py | 2 +- .../pre_commit_update_common_sql_api_stubs.py | 22 ++++--------------- .../pre_commit_update_example_dags_paths.py | 5 ++--- .../in_container/run_migration_reference.py | 4 ++-- .../update_quarantined_test_status.py | 5 ++--- scripts/in_container/verify_providers.py | 5 ++--- 8 files changed, 15 insertions(+), 35 deletions(-) diff --git a/kubernetes_tests/test_kubernetes_pod_operator.py b/kubernetes_tests/test_kubernetes_pod_operator.py index 002394611d9a..ec6ad5e503f1 100644 --- a/kubernetes_tests/test_kubernetes_pod_operator.py +++ b/kubernetes_tests/test_kubernetes_pod_operator.py @@ -925,7 +925,7 @@ def test_pod_template_file( " creation_timestamp: null", " deletion_grace_period_seconds: null", ] - actual = [x.getMessage() for x in caplog.records if x.msg == "Starting pod:\n%s"][0].splitlines() + actual = next(x.getMessage() for x in caplog.records if x.msg == "Starting pod:\n%s").splitlines() assert actual[: len(expected_lines)] == expected_lines actual_pod = self.api_client.sanitize_for_serialization(k.pod) diff --git a/scripts/ci/pre_commit/pre_commit_check_pre_commit_hooks.py b/scripts/ci/pre_commit/pre_commit_check_pre_commit_hooks.py index 113eeb8cf347..311c660d780c 100755 --- a/scripts/ci/pre_commit/pre_commit_check_pre_commit_hooks.py +++ b/scripts/ci/pre_commit/pre_commit_check_pre_commit_hooks.py @@ -127,10 +127,7 @@ def render_template( def update_static_checks_array(hooks: dict[str, list[str]], image_hooks: list[str]): rows = [] - hook_ids = list(hooks.keys()) - hook_ids.sort() - for hook_id in hook_ids: - hook_description = hooks[hook_id] + for hook_id, hook_description in sorted(hooks.items()): formatted_hook_description = ( hook_description[0] if len(hook_description) == 1 else "* " + "\n* ".join(hook_description) ) diff --git a/scripts/ci/pre_commit/pre_commit_json_schema.py b/scripts/ci/pre_commit/pre_commit_json_schema.py index 5a82183e31da..886ff13fe8b2 100755 --- a/scripts/ci/pre_commit/pre_commit_json_schema.py +++ b/scripts/ci/pre_commit/pre_commit_json_schema.py @@ -100,7 +100,7 @@ def load_file(file_path: str): if file_path.lower().endswith(".json"): with open(file_path) as input_file: return json.load(input_file) - elif file_path.lower().endswith(".yaml") or file_path.lower().endswith(".yml"): + elif file_path.lower().endswith((".yaml", ".yml")): with open(file_path) as input_file: return yaml.safe_load(input_file) raise _ValidatorError("Unknown file format. Supported extension: '.yaml', '.json'") diff --git a/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py b/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py index e07a4f6325b7..1a02ffad6a66 100755 --- a/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py +++ b/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py @@ -69,25 +69,11 @@ def summarize_changes(results: list[str]) -> tuple[int, int]: """ removals, additions = 0, 0 for line in results: - if ( - line.startswith("+") - or line.startswith("[green]+") - and not ( - # Skip additions of comments in counting removals - line.startswith("+#") - or line.startswith("[green]+#") - ) - ): + if line.startswith(("+", "[green]+")) and not line.startswith(("+#", "[green]+#")): + # Skip additions of comments in counting removals additions += 1 - if ( - line.startswith("-") - or line.startswith("[red]-") - and not ( - # Skip removals of comments in counting removals - line.startswith("-#") - or line.startswith("[red]-#") - ) - ): + if line.startswith(("-", "[red]+")) and not line.startswith(("-#", "[red]+#")): + # Skip removals of comments in counting removals removals += 1 return removals, additions diff --git a/scripts/ci/pre_commit/pre_commit_update_example_dags_paths.py b/scripts/ci/pre_commit/pre_commit_update_example_dags_paths.py index 8a0a71ceb913..c7fd4aa83478 100755 --- a/scripts/ci/pre_commit/pre_commit_update_example_dags_paths.py +++ b/scripts/ci/pre_commit/pre_commit_update_example_dags_paths.py @@ -52,11 +52,10 @@ def get_provider_and_version(url_path: str) -> tuple[str, str]: provider_info = yaml.safe_load(f) version = provider_info["versions"][0] provider = "-".join(candidate_folders) - while provider.endswith("-"): - provider = provider[:-1] + provider = provider.rstrip("-") return provider, version except FileNotFoundError: - candidate_folders = candidate_folders[:-1] + candidate_folders.pop() console.print( f"[red]Bad example path: {url_path}. Missing " f"provider.yaml in any of the 'airflow/providers/{url_path}' folders. [/]" diff --git a/scripts/in_container/run_migration_reference.py b/scripts/in_container/run_migration_reference.py index 43692b2c458d..47b8dbed25ee 100755 --- a/scripts/in_container/run_migration_reference.py +++ b/scripts/in_container/run_migration_reference.py @@ -187,6 +187,6 @@ def ensure_filenames_are_sorted(revisions): revisions = list(reversed(list(get_revisions()))) ensure_airflow_version(revisions=revisions) revisions = list(reversed(list(get_revisions()))) - ensure_filenames_are_sorted(revisions) + ensure_filenames_are_sorted(revisions=revisions) revisions = list(get_revisions()) - update_docs(revisions) + update_docs(revisions=revisions) diff --git a/scripts/in_container/update_quarantined_test_status.py b/scripts/in_container/update_quarantined_test_status.py index 7f03de5c3284..a9c155106cd8 100755 --- a/scripts/in_container/update_quarantined_test_status.py +++ b/scripts/in_container/update_quarantined_test_status.py @@ -63,7 +63,7 @@ class TestHistory(NamedTuple): ":x:": False, } -reverse_status_map: dict[bool, str] = {status_map[key]: key for key in status_map.keys()} +reverse_status_map: dict[bool, str] = {val: key for key, val in status_map.items()} def get_url(result: TestResult) -> str: @@ -160,8 +160,7 @@ def get_history_status(history: TestHistory): def get_table(history_map: dict[str, TestHistory]) -> str: headers = ["Test", "Last run", f"Last {num_runs} runs", "Status", "Comment"] the_table: list[list[str]] = [] - for ordered_key in sorted(history_map.keys()): - history = history_map[ordered_key] + for _, history in sorted(history_map.items()): the_table.append( [ history.url, diff --git a/scripts/in_container/verify_providers.py b/scripts/in_container/verify_providers.py index 08193a62dbf9..1eda95edfc0d 100755 --- a/scripts/in_container/verify_providers.py +++ b/scripts/in_container/verify_providers.py @@ -181,7 +181,7 @@ def onerror(_): for path, prefix in walkable_paths_and_prefixes.items(): for modinfo in pkgutil.walk_packages(path=[path], prefix=prefix, onerror=onerror): - if not any(modinfo.name.startswith(provider_prefix) for provider_prefix in provider_prefixes): + if not modinfo.name.startswith(tuple(provider_prefixes)): if print_skips: console.print(f"Skipping module: {modinfo.name}") continue @@ -326,8 +326,7 @@ def get_details_about_classes( :param wrong_entities: wrong entities found for that type :param full_package_name: full package name """ - all_entities = list(entities) - all_entities.sort() + all_entities = sorted(entities) TOTALS[entity_type] += len(all_entities) return EntityTypeSummary( entities=all_entities, From d329d70632d1373ef13a5955ee927f62f6d4eee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Wed, 16 Aug 2023 09:03:57 +0000 Subject: [PATCH 029/117] Refactor str.startswith with tuples (#33292) (cherry picked from commit d747a7924bca4b1e44003fe0932c28d7b504b088) --- airflow/template/templater.py | 6 +++--- dev/chart/build_changelog_annotations.py | 2 +- .../pre_commit/pre_commit_update_providers_dependencies.py | 4 ++-- scripts/in_container/run_provider_yaml_files_check.py | 7 +++---- scripts/in_container/verify_providers.py | 6 +++--- setup.py | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/airflow/template/templater.py b/airflow/template/templater.py index 83c4e763f833..29952b83b02e 100644 --- a/airflow/template/templater.py +++ b/airflow/template/templater.py @@ -69,7 +69,7 @@ def resolve_template_files(self) -> None: content = getattr(self, field, None) if content is None: continue - elif isinstance(content, str) and any(content.endswith(ext) for ext in self.template_ext): + elif isinstance(content, str) and content.endswith(tuple(self.template_ext)): env = self.get_template_env() try: setattr(self, field, env.loader.get_source(env, content)[0]) # type: ignore @@ -78,7 +78,7 @@ def resolve_template_files(self) -> None: elif isinstance(content, list): env = self.get_template_env() for i, item in enumerate(content): - if isinstance(item, str) and any(item.endswith(ext) for ext in self.template_ext): + if isinstance(item, str) and item.endswith(tuple(self.template_ext)): try: content[i] = env.loader.get_source(env, item)[0] # type: ignore except Exception: @@ -149,7 +149,7 @@ def render_template( jinja_env = self.get_template_env() if isinstance(value, str): - if any(value.endswith(ext) for ext in self.template_ext): # A filepath. + if value.endswith(tuple(self.template_ext)): # A filepath. template = jinja_env.get_template(value) else: template = jinja_env.from_string(value) diff --git a/dev/chart/build_changelog_annotations.py b/dev/chart/build_changelog_annotations.py index 259bd7dac406..fc38a47dcbf7 100755 --- a/dev/chart/build_changelog_annotations.py +++ b/dev/chart/build_changelog_annotations.py @@ -96,7 +96,7 @@ def print_entry(section: str, description: str, pr_number: int | None): break in_first_release = True continue - if line.startswith('"""') or line.startswith("----") or line.startswith("^^^^"): + if line.startswith(('"""', "----", "^^^^")): continue # Make sure we get past "significant features" before we actually start keeping track diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index c8ea48ec4845..09b2d0c82182 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -126,7 +126,7 @@ def get_provider_id_from_import(import_name: str, file_path: Path) -> str | None provider_id = get_provider_id_from_relative_import_or_file(relative_provider_import) if provider_id is None: relative_path_from_import = relative_provider_import.replace(".", os.sep) - if any(relative_path_from_import.startswith(suspended_path) for suspended_path in suspended_paths): + if relative_path_from_import.startswith(tuple(suspended_paths)): return None warnings.append(f"We could not determine provider id from import {import_name} in {file_path}") return provider_id @@ -154,7 +154,7 @@ def get_provider_id_from_file_name(file_path: Path) -> str | None: return None provider_id = get_provider_id_from_relative_import_or_file(str(relative_path)) if provider_id is None and file_path.name not in ["__init__.py", "get_provider_info.py"]: - if any(relative_path.as_posix().startswith(suspended_path) for suspended_path in suspended_paths): + if relative_path.as_posix().startswith(tuple(suspended_paths)): return None else: warnings.append(f"We had a problem to classify the file {file_path} to a provider") diff --git a/scripts/in_container/run_provider_yaml_files_check.py b/scripts/in_container/run_provider_yaml_files_check.py index e2caeea93b33..ae523eb4babc 100755 --- a/scripts/in_container/run_provider_yaml_files_check.py +++ b/scripts/in_container/run_provider_yaml_files_check.py @@ -419,19 +419,18 @@ def check_doc_files(yaml_files: dict[str, dict]): for f in expected_doc_files if f.name != "index.rst" and "_partials" not in f.parts - and not any(f.relative_to(DOCS_DIR).as_posix().startswith(s) for s in suspended_providers) + and not f.relative_to(DOCS_DIR).as_posix().startswith(tuple(suspended_providers)) } | { f"/docs/{f.relative_to(DOCS_DIR).as_posix()}" for f in DOCS_DIR.glob("apache-airflow-providers-*/operators.rst") - if not any(f.relative_to(DOCS_DIR).as_posix().startswith(s) for s in suspended_providers) + if not f.relative_to(DOCS_DIR).as_posix().startswith(tuple(suspended_providers)) } console.print("[yellow]Suspended logos:[/]") console.print(suspended_logos) expected_logo_urls = { f"/{f.relative_to(DOCS_DIR).as_posix()}" for f in DOCS_DIR.glob("integration-logos/**/*") - if f.is_file() - and not any(f"/{f.relative_to(DOCS_DIR).as_posix()}".startswith(s) for s in suspended_logos) + if f.is_file() and not f"/{f.relative_to(DOCS_DIR).as_posix()}".startswith(tuple(suspended_logos)) } try: diff --git a/scripts/in_container/verify_providers.py b/scripts/in_container/verify_providers.py index 1eda95edfc0d..78f8c0430cf1 100755 --- a/scripts/in_container/verify_providers.py +++ b/scripts/in_container/verify_providers.py @@ -164,9 +164,9 @@ def mk_prefix(provider_id): return f"{prefix}{provider_id}" if provider_ids: - provider_prefixes = [mk_prefix(provider_id) for provider_id in provider_ids] + provider_prefixes = tuple(mk_prefix(provider_id) for provider_id in provider_ids) else: - provider_prefixes = [prefix] + provider_prefixes = (prefix,) def onerror(_): nonlocal tracebacks @@ -181,7 +181,7 @@ def onerror(_): for path, prefix in walkable_paths_and_prefixes.items(): for modinfo in pkgutil.walk_packages(path=[path], prefix=prefix, onerror=onerror): - if not modinfo.name.startswith(tuple(provider_prefixes)): + if not modinfo.name.startswith(provider_prefixes): if print_skips: console.print(f"Skipping module: {modinfo.name}") continue diff --git a/setup.py b/setup.py index 00f4f7f947af..73742543b156 100644 --- a/setup.py +++ b/setup.py @@ -704,7 +704,7 @@ def is_package_excluded(package: str, exclusion_list: list[str]) -> bool: :param exclusion_list: list of excluded packages :return: true if package should be excluded """ - return any(package.startswith(excluded_package) for excluded_package in exclusion_list) + return package.startswith(tuple(exclusion_list)) def remove_provider_limits(package: str) -> str: From c597ca1e8b0b146843f228ef152da349138a9511 Mon Sep 17 00:00:00 2001 From: Augusto Hidalgo Date: Fri, 4 Aug 2023 12:24:16 -0400 Subject: [PATCH 030/117] Fix test_example_dags (#32714) By going up to `parents[3]` we were going outside the repository root, luckily(or unluckily the repo folder is also named `airflow` so the pattern `airflow/**/example_dags/example_*.py` still worked, but `tests/system/providers/**/example_*.py` wasn't being used. This discovered 2 new errors: - `example_local_to_wasb.py` was trivial to fix - `example_redis_publish.py`is more interesting: this one fails because `RedisPubSubSensor` constructor calls Redis.pubsub().subscribe(), which just hangs and DagBag fails with timeout. For now I'm just deleting this operator from the example. (cherry picked from commit c048bd5d9e98b05a8c2b68af0ef1853a700888f7) --- tests/always/test_example_dags.py | 8 +++++--- .../providers/redis/example_redis_publish.py | 14 +------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/tests/always/test_example_dags.py b/tests/always/test_example_dags.py index 02533b21cf54..2666a5b93a0d 100644 --- a/tests/always/test_example_dags.py +++ b/tests/always/test_example_dags.py @@ -26,7 +26,7 @@ from airflow.utils import yaml from tests.test_utils.asserts import assert_queries_count -AIRFLOW_SOURCES_ROOT = Path(__file__).resolve().parents[3] +AIRFLOW_SOURCES_ROOT = Path(__file__).resolve().parents[2] AIRFLOW_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" NO_DB_QUERY_EXCEPTION = ["/airflow/example_dags/example_subdag_operator.py"] @@ -54,7 +54,9 @@ def example_not_suspended_dags(): suspended_providers_folders = get_suspended_providers_folders() possible_prefixes = ["airflow/providers/", "tests/system/providers/"] suspended_providers_folders = [ - f"{prefix}{provider}" for prefix in possible_prefixes for provider in suspended_providers_folders + AIRFLOW_SOURCES_ROOT.joinpath(prefix, provider).as_posix() + for prefix in possible_prefixes + for provider in suspended_providers_folders ] for example_dir in example_dirs: candidates = glob(f"{AIRFLOW_SOURCES_ROOT.as_posix()}/{example_dir}", recursive=True) @@ -68,7 +70,7 @@ def example_dags_except_db_exception(): return [ dag_file for dag_file in example_not_suspended_dags() - if any(not dag_file.endswith(e) for e in NO_DB_QUERY_EXCEPTION) + if not any(dag_file.endswith(e) for e in NO_DB_QUERY_EXCEPTION) ] diff --git a/tests/system/providers/redis/example_redis_publish.py b/tests/system/providers/redis/example_redis_publish.py index e524a899afbf..4216b862d4a4 100644 --- a/tests/system/providers/redis/example_redis_publish.py +++ b/tests/system/providers/redis/example_redis_publish.py @@ -33,7 +33,6 @@ from airflow import DAG from airflow.providers.redis.operators.redis_publish import RedisPublishOperator from airflow.providers.redis.sensors.redis_key import RedisKeySensor -from airflow.providers.redis.sensors.redis_pub_sub import RedisPubSubSensor # [END import_module] # [START instantiate_dag] @@ -59,17 +58,6 @@ # [END RedisPublishOperator_DAG] - # [START RedisPubSubSensor_DAG] - pubsub_sensor_task = RedisPubSubSensor( - task_id="pubsub_sensor_task", - redis_conn_id="redis_default", - channels="your_channel", - dag=dag, - timeout=600, - poke_interval=30, - ) - # [END RedisPubSubSensor_DAG] - # [START RedisKeySensor_DAG] key_sensor_task = RedisKeySensor( task_id="key_sensor_task", @@ -81,7 +69,7 @@ ) # [END RedisKeySensor_DAG] - publish_task >> pubsub_sensor_task >> key_sensor_task + publish_task >> key_sensor_task from tests.system.utils.watcher import watcher From 75bf02ebd34b8192b4f28474a96ef4098a58b2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Sat, 12 Aug 2023 18:44:35 +0000 Subject: [PATCH 031/117] Refactor: Simplify code in tests (#33293) (cherry picked from commit d2c0bbebacd1c58d357d647707a1bdb0c5b3b425) --- tests/always/test_example_dags.py | 7 +++---- tests/always/test_project_structure.py | 4 ++-- tests/cli/commands/test_config_command.py | 4 +--- tests/decorators/test_python.py | 2 +- tests/jobs/test_scheduler_job.py | 4 ++-- tests/models/test_dagrun.py | 2 +- tests/operators/test_python.py | 8 ++++---- tests/serialization/test_dag_serialization.py | 2 +- tests/system/providers/amazon/aws/example_ec2.py | 3 +-- tests/system/providers/amazon/aws/utils/ec2.py | 2 +- tests/test_utils/providers.py | 2 +- tests/www/test_utils.py | 6 +++--- tests/www/views/test_views_acl.py | 2 +- tests/www/views/test_views_home.py | 6 +++--- 14 files changed, 25 insertions(+), 29 deletions(-) diff --git a/tests/always/test_example_dags.py b/tests/always/test_example_dags.py index 2666a5b93a0d..9e3f7b409917 100644 --- a/tests/always/test_example_dags.py +++ b/tests/always/test_example_dags.py @@ -61,16 +61,15 @@ def example_not_suspended_dags(): for example_dir in example_dirs: candidates = glob(f"{AIRFLOW_SOURCES_ROOT.as_posix()}/{example_dir}", recursive=True) for candidate in candidates: - if any(candidate.startswith(s) for s in suspended_providers_folders): - continue - yield candidate + if not candidate.startswith(tuple(suspended_providers_folders)): + yield candidate def example_dags_except_db_exception(): return [ dag_file for dag_file in example_not_suspended_dags() - if not any(dag_file.endswith(e) for e in NO_DB_QUERY_EXCEPTION) + if not dag_file.endswith(tuple(NO_DB_QUERY_EXCEPTION)) ] diff --git a/tests/always/test_project_structure.py b/tests/always/test_project_structure.py index b17e285bdb33..116f8f99d2d5 100644 --- a/tests/always/test_project_structure.py +++ b/tests/always/test_project_structure.py @@ -150,7 +150,7 @@ def get_classes_from_file(self, filepath: str): if not isinstance(current_node, ast.ClassDef): continue name = current_node.name - if not any(name.endswith(suffix) for suffix in self.CLASS_SUFFIXES): + if not name.endswith(tuple(self.CLASS_SUFFIXES)): continue results[f"{module}.{name}"] = current_node return results @@ -463,6 +463,6 @@ def test_no_illegal_suffixes(self): ) ) - invalid_files = [f for f in files if any(f.endswith(suffix) for suffix in illegal_suffixes)] + invalid_files = [f for f in files if f.endswith(tuple(illegal_suffixes))] assert [] == invalid_files diff --git a/tests/cli/commands/test_config_command.py b/tests/cli/commands/test_config_command.py index 59d04de860b6..c1e2924bfc27 100644 --- a/tests/cli/commands/test_config_command.py +++ b/tests/cli/commands/test_config_command.py @@ -194,9 +194,7 @@ def test_cli_comment_out_everything(self): ) output = temp_stdout.getvalue() lines = output.split("\n") - assert all( - line.startswith("#") or line.strip() == "" or line.startswith("[") for line in lines if line - ) + assert all(not line.strip() or line.startswith(("#", "[")) for line in lines if line) class TestCliConfigGetValue: diff --git a/tests/decorators/test_python.py b/tests/decorators/test_python.py index cd69759995a7..f1c84721b4ef 100644 --- a/tests/decorators/test_python.py +++ b/tests/decorators/test_python.py @@ -462,7 +462,7 @@ def add_num(number: int, num2: int = 2): bigger_number.operator.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) ret.operator.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) - ti_add_num = [ti for ti in dr.get_task_instances() if ti.task_id == "add_num"][0] + ti_add_num = next(ti for ti in dr.get_task_instances() if ti.task_id == "add_num") assert ti_add_num.xcom_pull(key=ret.key) == (test_number + 2) * 2 def test_dag_task(self): diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index ed09d9cd1820..cffea246e460 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -3123,7 +3123,7 @@ def test_list_py_file_paths(self): } for root, _, files in os.walk(TEST_DAG_FOLDER): for file_name in files: - if file_name.endswith(".py") or file_name.endswith(".zip"): + if file_name.endswith((".py", ".zip")): if file_name not in ignored_files: expected_files.add(f"{root}/{file_name}") for file_path in list_py_file_paths(TEST_DAG_FOLDER, include_examples=False): @@ -3136,7 +3136,7 @@ def test_list_py_file_paths(self): example_dag_folder = airflow.example_dags.__path__[0] for root, _, files in os.walk(example_dag_folder): for file_name in files: - if file_name.endswith(".py") or file_name.endswith(".zip"): + if file_name.endswith((".py", ".zip")): if file_name not in ["__init__.py"] and file_name not in ignored_files: expected_files.add(os.path.join(root, file_name)) detected_files.clear() diff --git a/tests/models/test_dagrun.py b/tests/models/test_dagrun.py index c90ec1aef328..5d809333c82e 100644 --- a/tests/models/test_dagrun.py +++ b/tests/models/test_dagrun.py @@ -1706,7 +1706,7 @@ def task_2(arg2): (1, State.NONE), (2, State.NONE), ] - ti1 = [i for i in tis if i.map_index == 0][0] + ti1 = next(i for i in tis if i.map_index == 0) # Now "clear" and "reduce" the length to empty list dag.clear() Variable.set(key="arg1", value=[]) diff --git a/tests/operators/test_python.py b/tests/operators/test_python.py index 69e68838e75f..28c70537ae7e 100644 --- a/tests/operators/test_python.py +++ b/tests/operators/test_python.py @@ -1383,7 +1383,7 @@ def test_short_circuit_with_teardowns( op1.skip = MagicMock() dagrun = dag_maker.create_dagrun() tis = dagrun.get_task_instances() - ti: TaskInstance = [x for x in tis if x.task_id == "op1"][0] + ti: TaskInstance = next(x for x in tis if x.task_id == "op1") ti._run_raw_task() expected_tasks = {dag.task_dict[x] for x in expected} if should_skip: @@ -1414,7 +1414,7 @@ def test_short_circuit_with_teardowns_complicated(self, dag_maker, config): op1.skip = MagicMock() dagrun = dag_maker.create_dagrun() tis = dagrun.get_task_instances() - ti: TaskInstance = [x for x in tis if x.task_id == "op1"][0] + ti: TaskInstance = next(x for x in tis if x.task_id == "op1") ti._run_raw_task() # we can't use assert_called_with because it's a set and therefore not ordered actual_skipped = set(op1.skip.call_args.kwargs["tasks"]) @@ -1441,7 +1441,7 @@ def test_short_circuit_with_teardowns_complicated_2(self, dag_maker): op1.skip = MagicMock() dagrun = dag_maker.create_dagrun() tis = dagrun.get_task_instances() - ti: TaskInstance = [x for x in tis if x.task_id == "op1"][0] + ti: TaskInstance = next(x for x in tis if x.task_id == "op1") ti._run_raw_task() # we can't use assert_called_with because it's a set and therefore not ordered actual_kwargs = op1.skip.call_args.kwargs @@ -1476,7 +1476,7 @@ def test_short_circuit_with_teardowns_debug_level(self, dag_maker, level, clear_ op1.skip = MagicMock() dagrun = dag_maker.create_dagrun() tis = dagrun.get_task_instances() - ti: TaskInstance = [x for x in tis if x.task_id == "op1"][0] + ti: TaskInstance = next(x for x in tis if x.task_id == "op1") ti._run_raw_task() # we can't use assert_called_with because it's a set and therefore not ordered actual_kwargs = op1.skip.call_args.kwargs diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 4616bd30d2a4..5338579e0104 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -454,7 +454,7 @@ def sorted_serialized_dag(dag_dict: dict): items should not matter but assertEqual would fail if the order of items changes in the dag dictionary """ - dag_dict["dag"]["tasks"] = sorted(dag_dict["dag"]["tasks"], key=lambda x: sorted(x.keys())) + dag_dict["dag"]["tasks"] = sorted(dag_dict["dag"]["tasks"], key=sorted) dag_dict["dag"]["_access_control"]["__var"]["test_role"]["__var"] = sorted( dag_dict["dag"]["_access_control"]["__var"]["test_role"]["__var"] ) diff --git a/tests/system/providers/amazon/aws/example_ec2.py b/tests/system/providers/amazon/aws/example_ec2.py index 1dd98488c299..dfd69c74bce8 100644 --- a/tests/system/providers/amazon/aws/example_ec2.py +++ b/tests/system/providers/amazon/aws/example_ec2.py @@ -57,8 +57,7 @@ def get_latest_ami_id(): Owners=["amazon"], ) # Sort on CreationDate - sorted_images = sorted(images["Images"], key=itemgetter("CreationDate"), reverse=True) - return sorted_images[0]["ImageId"] + return max(images["Images"], key=itemgetter("CreationDate"))["ImageId"] @task diff --git a/tests/system/providers/amazon/aws/utils/ec2.py b/tests/system/providers/amazon/aws/utils/ec2.py index c90418538fc7..c2d141137473 100644 --- a/tests/system/providers/amazon/aws/utils/ec2.py +++ b/tests/system/providers/amazon/aws/utils/ec2.py @@ -40,7 +40,7 @@ def _get_next_available_cidr(vpc_id: str) -> str: if len({block.prefixlen for block in existing_cidr_blocks}) > 1: raise ValueError(error_msg_template.format("Subnets do not all use the same CIDR block size.")) - last_used_block = sorted(existing_cidr_blocks)[-1] + last_used_block = max(existing_cidr_blocks) *_, last_reserved_ip = last_used_block return f"{last_reserved_ip + 1}/{last_used_block.prefixlen}" diff --git a/tests/test_utils/providers.py b/tests/test_utils/providers.py index 7d6770b81a2e..99f64a684954 100644 --- a/tests/test_utils/providers.py +++ b/tests/test_utils/providers.py @@ -53,6 +53,6 @@ def get_provider_min_airflow_version(provider_name): p = ProvidersManager() deps = p.providers[provider_name].data["dependencies"] - airflow_dep = [x for x in deps if x.startswith("apache-airflow")][0] + airflow_dep = next(x for x in deps if x.startswith("apache-airflow")) min_airflow_version = tuple(map(int, airflow_dep.split(">=")[1].split("."))) return min_airflow_version diff --git a/tests/www/test_utils.py b/tests/www/test_utils.py index 1dd1665a9bc0..46fa055e8c51 100644 --- a/tests/www/test_utils.py +++ b/tests/www/test_utils.py @@ -133,7 +133,7 @@ def test_params_search(self): def test_params_none_and_zero(self): query_str = utils.get_params(a=0, b=None, c="true") # The order won't be consistent, but that doesn't affect behaviour of a browser - pairs = list(sorted(query_str.split("&"))) + pairs = sorted(query_str.split("&")) assert ["a=0", "c=true"] == pairs def test_params_all(self): @@ -429,11 +429,11 @@ def test_dag_run_custom_sqla_interface_delete_no_collateral_damage(dag_maker, se assert len(set(x.run_id for x in dag_runs)) == 3 run_id_for_single_delete = "scheduled__2023-01-01T00:00:00+00:00" # we have 3 runs with this same run_id - assert len(list(x for x in dag_runs if x.run_id == run_id_for_single_delete)) == 3 + assert sum(1 for x in dag_runs if x.run_id == run_id_for_single_delete) == 3 # each is a different dag # if we delete one, it shouldn't delete the others - one_run = [x for x in dag_runs if x.run_id == run_id_for_single_delete][0] + one_run = next(x for x in dag_runs if x.run_id == run_id_for_single_delete) assert interface.delete(item=one_run) is True session.commit() dag_runs = session.query(DagRun).all() diff --git a/tests/www/views/test_views_acl.py b/tests/www/views/test_views_acl.py index 30e2a975c4cf..ac2821188ce3 100644 --- a/tests/www/views/test_views_acl.py +++ b/tests/www/views/test_views_acl.py @@ -336,7 +336,7 @@ def client_all_dags_dagruns(acl_app, user_all_dags_dagruns): def test_dag_stats_success(client_all_dags_dagruns): resp = client_all_dags_dagruns.post("dag_stats", follow_redirects=True) check_content_in_response("example_bash_operator", resp) - assert set(list(resp.json.items())[0][1][0].keys()) == {"state", "count"} + assert set(next(iter(resp.json.items()))[1][0].keys()) == {"state", "count"} def test_task_stats_failure(dag_test_client): diff --git a/tests/www/views/test_views_home.py b/tests/www/views/test_views_home.py index 852514989974..874596b682ed 100644 --- a/tests/www/views/test_views_home.py +++ b/tests/www/views/test_views_home.py @@ -154,7 +154,7 @@ def working_dags(tmpdir): dag_contents_template = "from airflow import DAG\ndag = DAG('{}', tags=['{}'])" with create_session() as session: - for dag_id, tag in list(zip(TEST_FILTER_DAG_IDS, TEST_TAGS)): + for dag_id, tag in zip(TEST_FILTER_DAG_IDS, TEST_TAGS): filename = os.path.join(tmpdir, f"{dag_id}.py") with open(filename, "w") as f: f.writelines(dag_contents_template.format(dag_id, tag)) @@ -169,7 +169,7 @@ def working_dags_with_read_perm(tmpdir): "access_control={{'role_single_dag':{{'can_read'}}}}) " ) with create_session() as session: - for dag_id, tag in list(zip(TEST_FILTER_DAG_IDS, TEST_TAGS)): + for dag_id, tag in zip(TEST_FILTER_DAG_IDS, TEST_TAGS): filename = os.path.join(tmpdir, f"{dag_id}.py") if dag_id == "filter_test_1": with open(filename, "w") as f: @@ -188,7 +188,7 @@ def working_dags_with_edit_perm(tmpdir): "access_control={{'role_single_dag':{{'can_edit'}}}}) " ) with create_session() as session: - for dag_id, tag in list(zip(TEST_FILTER_DAG_IDS, TEST_TAGS)): + for dag_id, tag in zip(TEST_FILTER_DAG_IDS, TEST_TAGS): filename = os.path.join(tmpdir, f"{dag_id}.py") if dag_id == "filter_test_1": with open(filename, "w") as f: From 40743706d13ed8600df73e244b95493a31d87240 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Fri, 11 Aug 2023 13:15:01 -0700 Subject: [PATCH 032/117] D205 Support - Root files (#33297) * D205 Support - Root files Updates setup.py, airflow/configuration.py, and airflow/exceptions.py * missed one (cherry picked from commit 4755fe40350c83fafef4525bf47040e1a1f0f07a) --- airflow/configuration.py | 24 +++++++++++------ airflow/exceptions.py | 5 +--- setup.py | 58 +++++++++++++++++++++++++--------------- 3 files changed, 53 insertions(+), 34 deletions(-) diff --git a/airflow/configuration.py b/airflow/configuration.py index 213f34423064..2da495ba0e9e 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -240,6 +240,7 @@ def is_template(self, section: str, key) -> bool: def _update_defaults_from_string(self, config_string: str): """ The defaults in _default_values are updated based on values in config_string ("ini" format). + Note that those values are not validated and cannot contain variables because we are using regular config parser to load them. This method is used to test the config parser in unit tests. @@ -477,8 +478,9 @@ def get_sections_including_defaults(self) -> list[str]: def get_options_including_defaults(self, section: str) -> list[str]: """ - Retrieves all possible option from the configuration parser for the section given, - including options defined by built-in defaults. + Retrieves all possible option from the configuration parser for the section given. + + Includes options defined by built-in defaults. :return: list of option names for the section given """ @@ -489,6 +491,7 @@ def get_options_including_defaults(self, section: str) -> list[str]: def optionxform(self, optionstr: str) -> str: """ This method transforms option names on every read, get, or set operation. + This changes from the default behaviour of ConfigParser from lower-casing to instead be case-preserving. @@ -500,8 +503,10 @@ def optionxform(self, optionstr: str) -> str: @contextmanager def make_sure_configuration_loaded(self, with_providers: bool) -> Generator[None, None, None]: """ - Make sure configuration is loaded with or without providers, regardless if the provider configuration - has been loaded before or not. Restores configuration to the state before entering the context. + Make sure configuration is loaded with or without providers. + + This happens regardless if the provider configuration has been loaded before or not. + Restores configuration to the state before entering the context. :param with_providers: whether providers should be loaded """ @@ -729,6 +734,7 @@ def validate(self): def _validate_max_tis_per_query(self) -> None: """ Check if config ``scheduler.max_tis_per_query`` is not greater than ``core.parallelism``. + If not met, a warning message is printed to guide the user to correct it. More info: https://github.com/apache/airflow/pull/32572 @@ -1941,10 +1947,12 @@ def create_default_config_parser(configuration_description: dict[str, dict[str, def create_pre_2_7_defaults() -> ConfigParser: """ - Creates parser using the old defaults from Airflow < 2.7.0, in order to be able to fall-back to those - defaults when old version of provider, not supporting "config contribution" is installed with Airflow - 2.7.0+. This "default" configuration does not support variable expansion, those are pretty much - hard-coded defaults we want to fall-back to in such case. + Creates parser using the old defaults from Airflow < 2.7.0. + + This is used in order to be able to fall-back to those defaults when old version of provider, + not supporting "config contribution" is installed with Airflow 2.7.0+. This "default" + configuration does not support variable expansion, those are pretty much hard-coded defaults ' + we want to fall-back to in such case. """ config_parser = ConfigParser() config_parser.read(_default_config_file_path("pre_2_7_defaults.cfg")) diff --git a/airflow/exceptions.py b/airflow/exceptions.py index fe0c4e416bcd..b471297cd959 100644 --- a/airflow/exceptions.py +++ b/airflow/exceptions.py @@ -176,10 +176,7 @@ class AirflowClusterPolicySkipDag(AirflowException): class AirflowClusterPolicyError(AirflowException): - """ - Raise when there is an error in Cluster Policy, - except AirflowClusterPolicyViolation and AirflowClusterPolicySkipDag. - """ + """Raise for a Cluster Policy other than AirflowClusterPolicyViolation or AirflowClusterPolicySkipDag.""" class AirflowTimetableInvalid(AirflowException): diff --git a/setup.py b/setup.py index 73742543b156..ad1ca9d44175 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,8 @@ def apply_pypi_suffix_to_airflow_packages(dependencies: list[str]) -> None: """ + Apply version suffix to dependencies that do not have one. + Looks through the list of dependencies, finds which one are airflow or airflow providers packages and applies the version suffix to those of them that do not have the suffix applied yet. @@ -130,6 +132,7 @@ def airflow_test_suite() -> unittest.TestSuite: class CleanCommand(Command): """ Command to tidy up the project root. + Registered as cmdclass in setup() so it can be called with ``python setup.py extra_clean``. """ @@ -166,6 +169,7 @@ def run(self) -> None: class CompileAssets(Command): """ Compile and build the frontend assets using yarn and webpack. + Registered as cmdclass in setup() so it can be called with ``python setup.py compile_assets``. """ @@ -187,7 +191,8 @@ def run(self) -> None: class ListExtras(Command): """ - List all available extras + List all available extras. + Registered as cmdclass in setup() so it can be called with ``python setup.py list_extras``. """ @@ -207,11 +212,12 @@ def run(self) -> None: def git_version() -> str: """ - Return a version to identify the state of the underlying git repo. The version will - indicate whether the head of the current git-backed working directory is tied to a - release tag or not : it will indicate the former with a 'release:{version}' prefix - and the latter with a '.dev0' suffix. Following the prefix will be a sha of the current - branch head. Finally, a "dirty" suffix is appended to indicate that uncommitted + Return a version to identify the state of the underlying git repo. + + The version will indicate whether the head of the current git-backed working directory + is tied to a release tag or not : it will indicate the former with a 'release:{version}' + prefix and the latter with a '.dev0' suffix. Following the prefix will be a sha of the + current branch head. Finally, a "dirty" suffix is appended to indicate that uncommitted changes are present. :return: Found Airflow version in Git repo @@ -596,8 +602,9 @@ def add_additional_extras() -> None: def add_extras_for_all_deprecated_aliases() -> None: """ - Add extras for all deprecated aliases. Requirements for those deprecated aliases are the same - as the extras they are replaced with. + Add extras for all deprecated aliases. + + Requirements for those deprecated aliases are the same as the extras they are replaced with. The dependencies are not copies - those are the same lists as for the new extras. This is intended. Thanks to that if the original extras are later extended with providers, aliases are extended as well. """ @@ -610,8 +617,7 @@ def add_extras_for_all_deprecated_aliases() -> None: def add_all_deprecated_provider_packages() -> None: """ - For deprecated aliases that are providers, we will swap the providers dependencies to instead - be the provider itself. + For deprecated aliases that are providers, swap the providers dependencies to be the provider itself. e.g. {"kubernetes": ["kubernetes>=3.0.0, <12.0.0", ...]} becomes {"kubernetes": ["apache-airflow-provider-cncf-kubernetes"]} @@ -744,6 +750,7 @@ def remove_provider_limits(package: str) -> str: def sort_extras_dependencies() -> dict[str, list[str]]: """ The dictionary order remains when keys() are retrieved. + Sort both: extras and list of dependencies to make it easier to analyse problems external packages will be first, then if providers are added they are added at the end of the lists. """ @@ -812,8 +819,8 @@ def __init__(self, attrs=None): def parse_config_files(self, *args, **kwargs) -> None: """ - Ensure that when we have been asked to install providers from sources - that we don't *also* try to install those providers from PyPI. + When asked to install providers from sources, ensure we don't *also* try to install from PyPI. + Also we should make sure that in this case we copy provider.yaml files so that Providers manager can find package information. """ @@ -837,11 +844,13 @@ def parse_config_files(self, *args, **kwargs) -> None: def replace_extra_dependencies_with_provider_packages(extra: str, providers: list[str]) -> None: """ - Replaces extra dependencies with provider package. The intention here is that when - the provider is added as dependency of extra, there is no need to add the dependencies - separately. This is not needed and even harmful, because in case of future versions of - the provider, the dependencies might change, so hard-coding dependencies from the version - that was available at the release time might cause dependency conflicts in the future. + Replaces extra dependencies with provider package. + + The intention here is that when the provider is added as dependency of extra, there is no + need to add the dependencies separately. This is not needed and even harmful, because in + case of future versions of the provider, the dependencies might change, so hard-coding + dependencies from the version that was available at the release time might cause dependency + conflicts in the future. Say for example that you have salesforce provider with those deps: @@ -888,9 +897,11 @@ def replace_extra_dependencies_with_provider_packages(extra: str, providers: lis def add_provider_packages_to_extra_dependencies(extra: str, providers: list[str]) -> None: """ - Adds provider packages as dependencies to extra. This is used to add provider packages as dependencies - to the "bulk" kind of extras. Those bulk extras do not have the detailed 'extra' dependencies as - initial values, so instead of replacing them (see previous function) we can extend them. + Adds provider packages as dependencies to extra. + + This is used to add provider packages as dependencies to the "bulk" kind of extras. + Those bulk extras do not have the detailed 'extra' dependencies as initial values, + so instead of replacing them (see previous function) we can extend them. :param extra: Name of the extra to add providers to :param providers: list of provider ids @@ -902,6 +913,8 @@ def add_provider_packages_to_extra_dependencies(extra: str, providers: list[str] def add_all_provider_packages() -> None: """ + Add extra dependencies when providers are installed from packages. + In case of regular installation (providers installed from packages), we should add extra dependencies to Airflow - to get the providers automatically installed when those extras are installed. @@ -965,8 +978,9 @@ def do_setup() -> None: def include_provider_namespace_packages_when_installing_from_sources() -> None: """ - When installing providers from sources we install all namespace packages found below airflow, - including airflow and provider packages, otherwise defaults from setup.cfg control this. + When installing providers from sources we install all namespace packages found below airflow. + + Includes airflow and provider packages, otherwise defaults from setup.cfg control this. The kwargs in setup() call override those that are specified in setup.cfg. """ if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": From a33cbf0355678ea7b8da764bd83ad2eed912fcc5 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sun, 13 Aug 2023 15:48:21 -0700 Subject: [PATCH 033/117] D205 Support - WWW (#33298) * D205 Support - WWW * fix broken link (cherry picked from commit 64c2a56faf31d5f7e38e7b55e9eb4c38ced54c59) --- airflow/www/api/experimental/endpoints.py | 13 +++--- airflow/www/decorators.py | 3 +- airflow/www/extensions/init_appbuilder.py | 12 +++--- airflow/www/extensions/init_dagbag.py | 5 ++- airflow/www/extensions/init_robots.py | 6 ++- airflow/www/extensions/init_security.py | 5 ++- airflow/www/fab_security/manager.py | 24 ++++++------ airflow/www/fab_security/sqla/manager.py | 11 +++--- airflow/www/forms.py | 5 +-- airflow/www/security.py | 25 ++++++++---- airflow/www/utils.py | 25 ++++++------ airflow/www/views.py | 48 ++++++++++++++--------- 12 files changed, 101 insertions(+), 81 deletions(-) diff --git a/airflow/www/api/experimental/endpoints.py b/airflow/www/api/experimental/endpoints.py index 2af00eeee616..ccf024543f32 100644 --- a/airflow/www/api/experimental/endpoints.py +++ b/airflow/www/api/experimental/endpoints.py @@ -58,8 +58,10 @@ def decorated(*args, **kwargs): def add_deprecation_headers(response: Response): """ - Add `Deprecation HTTP Header Field - `__. + Add Deprecation HTTP Header Field. + + .. seealso:: IETF proposal for the header field + `here `_. """ response.headers["Deprecation"] = "true" doc_url = get_docs_url("upgrading-to-2.html#migration-guide-from-experimental-api-to-stable-api-v1") @@ -79,10 +81,7 @@ def add_deprecation_headers(response: Response): @api_experimental.route("/dags//dag_runs", methods=["POST"]) @requires_authentication def trigger_dag(dag_id): - """ - Trigger a new dag run for a Dag with an execution date of now unless - specified in the data. - """ + """Trigger a new dag run for a Dag with an execution date of now unless specified in the data.""" data = request.get_json(force=True) run_id = None @@ -251,6 +250,7 @@ def dag_is_paused(dag_id): def task_instance_info(dag_id, execution_date, task_id): """ Returns a JSON with a task instance's public instance variables. + The format for the exec_date is expected to be "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". This will of course need to have been encoded for URL in the request. @@ -287,6 +287,7 @@ def task_instance_info(dag_id, execution_date, task_id): def dag_run_status(dag_id, execution_date): """ Returns a JSON with a dag_run's public instance variables. + The format for the exec_date is expected to be "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". This will of course need to have been encoded for URL in the request. diff --git a/airflow/www/decorators.py b/airflow/www/decorators.py index c74be2635e1f..975910fe5077 100644 --- a/airflow/www/decorators.py +++ b/airflow/www/decorators.py @@ -41,9 +41,10 @@ def _mask_variable_fields(extra_fields): """ + Mask the 'val_content' field if 'key_content' is in the mask list. + The variable requests values and args comes in this form: [('key', 'key_content'),('val', 'val_content'), ('description', 'description_content')] - So we need to mask the 'val_content' field if 'key_content' is in the mask list. """ result = [] keyname = None diff --git a/airflow/www/extensions/init_appbuilder.py b/airflow/www/extensions/init_appbuilder.py index 11c358abb62e..9c2948e32490 100644 --- a/airflow/www/extensions/init_appbuilder.py +++ b/airflow/www/extensions/init_appbuilder.py @@ -71,6 +71,7 @@ def dynamic_class_import(class_path): class AirflowAppBuilder: """ This is the base class for all the framework. + This is where you will register all your views and create the menu structure. Will hold your flask app object, all your views, and security classes. @@ -235,10 +236,7 @@ def _init_extension(self, app): app.extensions["appbuilder"] = self def _swap_url_filter(self): - """ - Use our url filtering util function so there is consistency between - FAB and Airflow routes. - """ + """Use our url filtering util function so there is consistency between FAB and Airflow routes.""" from flask_appbuilder.security import views as fab_sec_views from airflow.www.views import get_safe_url @@ -537,9 +535,9 @@ def add_separator(self, category, cond=None): def add_view_no_menu(self, baseview, endpoint=None, static_folder=None): """ - Add your views without creating a menu. - :param baseview: - A BaseView type class instantiated. + Add your views without creating a menu. + + :param baseview: A BaseView type class instantiated. """ baseview = self._check_and_init(baseview) log.info(LOGMSG_INF_FAB_ADD_VIEW.format(baseview.__class__.__name__, "")) diff --git a/airflow/www/extensions/init_dagbag.py b/airflow/www/extensions/init_dagbag.py index 0a736b424a06..1cab2bff6bb5 100644 --- a/airflow/www/extensions/init_dagbag.py +++ b/airflow/www/extensions/init_dagbag.py @@ -24,8 +24,9 @@ def init_dagbag(app): """ - Create global DagBag for webserver and API. To access it use - ``flask.current_app.dag_bag``. + Create global DagBag for webserver and API. + + To access it use ``flask.current_app.dag_bag``. """ if os.environ.get("SKIP_DAGS_PARSING") == "True": app.dag_bag = DagBag(os.devnull, include_examples=False) diff --git a/airflow/www/extensions/init_robots.py b/airflow/www/extensions/init_robots.py index 3f7fb4855628..c5f037db3ddb 100644 --- a/airflow/www/extensions/init_robots.py +++ b/airflow/www/extensions/init_robots.py @@ -23,8 +23,10 @@ def init_robots(app): """ - Add X-Robots-Tag header. Use it to avoid search engines indexing airflow. This mitigates some - of the risk associated with exposing Airflow to the public internet, however it does not + Add X-Robots-Tag header. + + Use it to avoid search engines indexing airflow. This mitigates some of the risk + associated with exposing Airflow to the public internet, however it does not address the real security risks associated with such a deployment. See also: https://developers.google.com/search/docs/advanced/robots/robots_meta_tag#xrobotstag diff --git a/airflow/www/extensions/init_security.py b/airflow/www/extensions/init_security.py index ea3c2211c94f..41a8dc6afc17 100644 --- a/airflow/www/extensions/init_security.py +++ b/airflow/www/extensions/init_security.py @@ -30,8 +30,9 @@ def init_xframe_protection(app): """ - Add X-Frame-Options header. Use it to avoid click-jacking attacks, by ensuring that their content is not - embedded into other sites. + Add X-Frame-Options header. + + Use it to avoid click-jacking attacks, by ensuring that their content is not embedded into other sites. See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options """ diff --git a/airflow/www/fab_security/manager.py b/airflow/www/fab_security/manager.py index fca6a3083ceb..91fb58a59391 100644 --- a/airflow/www/fab_security/manager.py +++ b/airflow/www/fab_security/manager.py @@ -80,10 +80,7 @@ def _oauth_tokengetter(token=None): - """ - Default function to return the current user oauth token - from session cookie. - """ + """Default function to return the current user oauth token from session cookie.""" token = session.get("oauth") log.debug("Token Get: %s", token) return token @@ -470,11 +467,10 @@ def current_user(self): def oauth_user_info_getter(self, f): """ - Decorator function to be the OAuth user info getter - for all the providers, receives provider and response - return a dict with the information returned from the provider. - The returned user info dict should have it's keys with the same - name as the User Model. + Decorator function to be the OAuth user info getter for all the providers. + + Receives provider and response return a dict with the information returned from the provider. + The returned user info dict should have it's keys with the same name as the User Model. Use it like this an example for GitHub :: @@ -500,8 +496,9 @@ def wraps(provider, response=None): def get_oauth_token_key_name(self, provider): """ - Returns the token_key name for the oauth provider - if none is configured defaults to oauth_token + Returns the token_key name for the oauth provider. + + If none is configured defaults to oauth_token this is configured using OAUTH_PROVIDERS and token_key key. """ for _provider in self.oauth_providers: @@ -1504,8 +1501,9 @@ def create_permission(self, action_name: str, resource_name: str) -> Permission def delete_permission(self, action_name: str, resource_name: str) -> None: """ - Deletes the permission linking an action->resource pair. Doesn't delete the - underlying action or resource. + Deletes the permission linking an action->resource pair. + + Doesn't delete the underlying action or resource. :param action_name: Name of existing action :param resource_name: Name of existing resource diff --git a/airflow/www/fab_security/sqla/manager.py b/airflow/www/fab_security/sqla/manager.py index 6ce9580c2927..83e2119492e6 100644 --- a/airflow/www/fab_security/sqla/manager.py +++ b/airflow/www/fab_security/sqla/manager.py @@ -42,8 +42,7 @@ class SecurityManager(BaseSecurityManager): """ - Responsible for authentication, registering security views, - role and permission auto management. + Responsible for authentication, registering security views, role and permission auto management. If you want to change anything just inherit and override, then pass your own security manager to AppBuilder. @@ -281,8 +280,7 @@ def permission_exists_in_one_or_more_roles( self, resource_name: str, action_name: str, role_ids: list[int] ) -> bool: """ - Method to efficiently check if a certain permission exists - on a list of role id's. This is used by `has_access`. + Efficiently check if a certain permission exists on a list of role ids; used by `has_access`. :param resource_name: The view's name to check if exists on one of the roles :param action_name: The permission name to check if exists @@ -507,8 +505,9 @@ def create_permission(self, action_name, resource_name) -> Permission | None: def delete_permission(self, action_name: str, resource_name: str) -> None: """ - Deletes the permission linking an action->resource pair. Doesn't delete the - underlying action or resource. + Deletes the permission linking an action->resource pair. + + Doesn't delete the underlying action or resource. :param action_name: Name of existing action :param resource_name: Name of existing resource diff --git a/airflow/www/forms.py b/airflow/www/forms.py index 4a0213945c86..61b203804a50 100644 --- a/airflow/www/forms.py +++ b/airflow/www/forms.py @@ -100,10 +100,7 @@ class DateTimeForm(FlaskForm): class DateTimeWithNumRunsForm(FlaskForm): - """ - Date time and number of runs form for tree view, task duration - and landing times. - """ + """Date time and number of runs form for tree view, task duration and landing times.""" base_date = DateTimeWithTimezoneField( "Anchor date", widget=AirflowDateTimePickerWidget(), default=timezone.utcnow() diff --git a/airflow/www/security.py b/airflow/www/security.py index 98384839aba8..8f119fb680f9 100644 --- a/airflow/www/security.py +++ b/airflow/www/security.py @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -454,8 +453,9 @@ def is_dag_resource(self, resource_name: str) -> bool: def has_access(self, action_name: str, resource_name: str, user=None) -> bool: """ - Verify whether a given user could perform a certain action - (e.g can_read, can_write, can_delete) on the given resource. + Verify whether a given user could perform a certain action on the given resource. + + Example actions might include can_read, can_write, can_delete, etc. :param action_name: action_name on resource (e.g can_read, can_edit). :param resource_name: name of view-menu or resource. @@ -483,7 +483,8 @@ def _has_role(self, role_name_or_list: Container, user) -> bool: def has_all_dags_access(self, user) -> bool: """ - Has all the dag access in any of the 3 cases: + Has all the dag access in any of the 3 cases. + 1. Role needs to be in (Admin, Viewer, User, Op). 2. Has can_read action on dags resource. 3. Has can_edit action on dags resource. @@ -529,6 +530,7 @@ def clean_perms(self) -> None: def _merge_perm(self, action_name: str, resource_name: str) -> None: """ Add the new (action, resource) to assoc_permission_role if it doesn't exist. + It will add the related entry to ab_permission and ab_resource two meta tables as well. :param action_name: Name of the action @@ -572,6 +574,8 @@ def get_all_permissions(self) -> set[tuple[str, str]]: def _get_all_non_dag_permissions(self) -> dict[tuple[str, str], Permission]: """ + Get permissions except those that are for specific DAGs. + Returns a dict with a key of (action_name, resource_name) and value of permission with all permissions except those that are for specific DAGs. """ @@ -598,6 +602,8 @@ def _get_all_roles_with_permissions(self) -> dict[str, Role]: def create_dag_specific_permissions(self) -> None: """ + Add permissions to all DAGs. + Creates 'can_read', 'can_edit', and 'can_delete' permissions for all DAGs, along with any `access_control` permissions provided in them. @@ -623,7 +629,9 @@ def create_dag_specific_permissions(self) -> None: def update_admin_permission(self) -> None: """ - Admin should have all the permissions, except the dag permissions. + Add missing permissions to the table for admin. + + Admin should get all the permissions, except the dag permissions because Admin already has Dags permission. Add the missing ones to the table for admin. @@ -645,6 +653,8 @@ def update_admin_permission(self) -> None: def sync_roles(self) -> None: """ + Initialize default and custom roles with related permissions. + 1. Init the default role(Admin, Viewer, User, Op, public) with related permissions. 2. Init the custom role(dag-user) with related permissions. @@ -677,8 +687,9 @@ def sync_perm_for_dag( access_control: dict[str, Collection[str]] | None = None, ) -> None: """ - Sync permissions for given dag id. The dag id surely exists in our dag bag - as only / refresh button or DagBag will call this function. + Sync permissions for given dag id. + + The dag id surely exists in our dag bag as only / refresh button or DagBag will call this function. :param dag_id: the ID of the DAG whose permissions should be updated :param access_control: a dict where each key is a rolename and diff --git a/airflow/www/utils.py b/airflow/www/utils.py index 6d5f4d74205f..7541f6445e58 100644 --- a/airflow/www/utils.py +++ b/airflow/www/utils.py @@ -251,11 +251,12 @@ def generate_pages( sorting_direction=None, ): """ - Generates the HTML for a paging component using a similar logic to the paging - auto-generated by Flask managed views. The paging component defines a number of - pages visible in the pager (window) and once the user goes to a page beyond the - largest visible, it would scroll to the right the page numbers and keeps the - current one in the middle of the pager component. When in the last pages, + Generates the HTML for a paging component. + + Uses a similar logic to the paging auto-generated by Flask managed views. The paging + component defines a number of pages visible in the pager (window) and once the user + goes to a page beyond the largest visible, it would scroll to the right the page numbers + and keeps the current one in the middle of the pager component. When in the last pages, the pages won't scroll and just keep moving until the last page. Pager also contains pages. This component takes into account custom parameters such as search, status, and tags @@ -646,10 +647,11 @@ def get_attr_renderer(): def get_chart_height(dag): """ - We use the number of tasks in the DAG as a heuristic to - approximate the size of generated chart (otherwise the charts are tiny and unreadable - when DAGs have a large number of tasks). Ideally nvd3 should allow for dynamic-height - charts, that is charts that take up space based on the size of the components within. + Use the number of tasks in the DAG to approximate the size of generated chart. + + Without this the charts are tiny and unreadable when DAGs have a large number of tasks). + Ideally nvd3 should allow for dynamic-height charts, that is charts that take up space + based on the size of the components within. TODO(aoen): See [AIRFLOW-1263]. """ return 600 + len(dag.tasks) * 10 @@ -787,10 +789,9 @@ def __init__(self, datamodel): class CustomSQLAInterface(SQLAInterface): """ - FAB does not know how to handle columns with leading underscores because - they are not supported by WTForm. This hack will remove the leading - '_' from the key to lookup the column names. + FAB does not know how to handle columns with leading underscores because they are not supported by WTForm. + This hack will remove the leading '_' from the key to lookup the column names. """ def __init__(self, obj, session: Session | None = None): diff --git a/airflow/www/views.py b/airflow/www/views.py index eee5788275be..31a768abf2bc 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -289,8 +289,9 @@ def node_dict(node_id, label, node_class): def dag_to_grid(dag: DagModel, dag_runs: Sequence[DagRun], session: Session): """ - Create a nested dict representation of the DAG's TaskGroup and its children - used to construct the Graph and Grid views. + Create a nested dict representation of the DAG's TaskGroup and its children. + + Used to construct the Graph and Grid views. """ query = session.execute( select( @@ -593,8 +594,9 @@ def get_task_stats_from_query(qry): def redirect_or_json(origin, msg, status="", status_code=200): """ - Some endpoints are called by javascript, - returning json will allow us to more elegantly handle side-effects in-page. + Returning json will allow us to more elegantly handle side effects in-page. + + This is useful because some endpoints are called by javascript. """ if request.headers.get("Accept") == "application/json": if status == "error" and status_code == 200: @@ -705,8 +707,9 @@ class Airflow(AirflowBaseView): @expose("/health") def health(self): """ - An endpoint helping check the health status of the Airflow instance, - including metadatabase, scheduler and triggerer. + An endpoint helping check the health status of the Airflow instance. + + Includes metadatabase, scheduler and triggerer. """ airflow_health_status = get_airflow_health() @@ -3873,8 +3876,10 @@ def dataset_dependencies(self): @expose("/object/datasets_summary") @auth.has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_DATASET)]) def datasets_summary(self): - """Get a summary of datasets, including the datetime they were last updated and how many updates - they've ever had. + """ + Get a summary of datasets. + + Includes the datetime they were last updated and how many updates they've ever had. """ allowed_attrs = ["uri", "last_dataset_update"] @@ -3968,9 +3973,11 @@ def datasets_summary(self): @action_logging def robots(self): """ - Returns a robots.txt file for blocking certain search engine crawlers. This mitigates some - of the risk associated with exposing Airflow to the public internet, however it does not - address the real security risks associated with such a deployment. + Returns a robots.txt file for blocking certain search engine crawlers. + + This mitigates some of the risk associated with exposing Airflow to the public + internet, however it does not address the real security risks associated with + such a deployment. """ return send_from_directory(get_airflow_app().static_folder, "robots.txt") @@ -4154,7 +4161,8 @@ def apply(self, query, func): class AirflowModelView(ModelView): - """Airflow Mode View. + """ + Airflow Mode View. Overridden `__getattribute__` to wraps REST methods with action_logger """ @@ -4165,7 +4173,9 @@ class AirflowModelView(ModelView): CustomSQLAInterface = wwwutils.CustomSQLAInterface def __getattribute__(self, attr): - """Wraps action REST methods with `action_logging` wrapper + """ + Wraps action REST methods with `action_logging` wrapper. + Overriding enables differentiating resource and generation of event name at the decorator level. if attr in ["show", "list", "read", "get", "get_list"]: @@ -4187,8 +4197,9 @@ def __getattribute__(self, attr): class AirflowPrivilegeVerifierModelView(AirflowModelView): """ - This ModelView prevents ability to pass primary keys of objects relating to DAGs you shouldn't be able to - edit. This only holds for the add, update and delete operations. + Prevents ability to pass primary keys of objects relating to DAGs you shouldn't be able to edit. + + This only holds for the add, update and delete operations. You will still need to use the `action_has_dag_edit_access()` for actions. """ @@ -5924,10 +5935,9 @@ def _calculate_graph(self): def add_user_permissions_to_dag(sender, template, context, **extra): """ - Adds `.can_edit`, `.can_trigger`, and `.can_delete` properties - to DAG based on current user's permissions. - Located in `views.py` rather than the DAG model to keep - permissions logic out of the Airflow core. + Adds `.can_edit`, `.can_trigger`, and `.can_delete` properties to DAG based on current user's permissions. + + Located in `views.py` rather than the DAG model to keep permissions logic out of the Airflow core. """ if "dag" not in context: return From 601d0e41cc5edd91b6395115eadbd4254755beea Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sun, 13 Aug 2023 16:19:21 -0700 Subject: [PATCH 034/117] D401 Support - Secrets to Triggers (Inclusive) (#33338) (cherry picked from commit 44a752aa96d5aa4263f4ac1725642084fd612a48) --- airflow/secrets/cache.py | 18 ++++++++++------- airflow/secrets/local_filesystem.py | 6 +++--- airflow/security/permissions.py | 2 +- airflow/security/utils.py | 6 +++--- airflow/sensors/base.py | 4 ++-- airflow/sensors/date_time.py | 4 ++-- airflow/sensors/external_task.py | 4 ++-- airflow/sensors/time_delta.py | 2 +- airflow/sensors/time_sensor.py | 2 +- airflow/serialization/serde.py | 4 ++-- airflow/serialization/serialized_objects.py | 20 ++++++++++--------- airflow/task/task_runner/base_task_runner.py | 2 +- airflow/template/templater.py | 5 +++-- airflow/ti_deps/dep_context.py | 2 +- airflow/ti_deps/deps/base_ti_dep.py | 6 +++--- airflow/ti_deps/deps/dagrun_backfill_dep.py | 2 +- .../ti_deps/deps/pool_slots_available_dep.py | 2 +- airflow/ti_deps/deps/ready_to_reschedule.py | 2 +- airflow/timetables/interval.py | 3 ++- airflow/triggers/base.py | 4 ++-- airflow/triggers/external_task.py | 18 ++++++++--------- airflow/triggers/file.py | 2 +- 22 files changed, 63 insertions(+), 57 deletions(-) diff --git a/airflow/secrets/cache.py b/airflow/secrets/cache.py index c4f4ec506565..25b315efcd12 100644 --- a/airflow/secrets/cache.py +++ b/airflow/secrets/cache.py @@ -46,7 +46,11 @@ def is_expired(self, ttl: datetime.timedelta) -> bool: @classmethod def init(cls): - """Initializes the cache, provided the configuration allows it. Safe to call several times.""" + """ + Initialize the cache, provided the configuration allows it. + + Safe to call several times. + """ if cls._cache is not None: return use_cache = conf.getboolean(section="secrets", key="use_cache", fallback=False) @@ -62,13 +66,13 @@ def init(cls): @classmethod def reset(cls): - """For test purposes only.""" + """Use for test purposes only.""" cls._cache = None @classmethod def get_variable(cls, key: str) -> str | None: """ - Tries to get the value associated with the key from the cache. + Try to get the value associated with the key from the cache. :return: The saved value (which can be None) if present in cache and not expired, a NotPresent exception otherwise. @@ -78,7 +82,7 @@ def get_variable(cls, key: str) -> str | None: @classmethod def get_connection_uri(cls, conn_id: str) -> str: """ - Tries to get the uri associated with the conn_id from the cache. + Try to get the uri associated with the conn_id from the cache. :return: The saved uri if present in cache and not expired, a NotPresent exception otherwise. @@ -101,12 +105,12 @@ def _get(cls, key: str, prefix: str) -> str | None: @classmethod def save_variable(cls, key: str, value: str | None): - """Saves the value for that key in the cache, if initialized.""" + """Save the value for that key in the cache, if initialized.""" cls._save(key, value, cls._VARIABLE_PREFIX) @classmethod def save_connection_uri(cls, conn_id: str, uri: str): - """Saves the uri representation for that connection in the cache, if initialized.""" + """Save the uri representation for that connection in the cache, if initialized.""" if uri is None: # connections raise exceptions if not present, so we shouldn't have any None value to save. return @@ -119,7 +123,7 @@ def _save(cls, key: str, value: str | None, prefix: str): @classmethod def invalidate_variable(cls, key: str): - """Invalidates (actually removes) the value stored in the cache for that Variable.""" + """Invalidate (actually removes) the value stored in the cache for that Variable.""" if cls._cache is not None: # second arg ensures no exception if key is absent cls._cache.pop(f"{cls._VARIABLE_PREFIX}{key}", None) diff --git a/airflow/secrets/local_filesystem.py b/airflow/secrets/local_filesystem.py index 7490c8e25c42..742b5c458906 100644 --- a/airflow/secrets/local_filesystem.py +++ b/airflow/secrets/local_filesystem.py @@ -46,7 +46,7 @@ def get_connection_parameter_names() -> set[str]: - """Returns :class:`airflow.models.connection.Connection` constructor parameters.""" + """Return :class:`airflow.models.connection.Connection` constructor parameters.""" from airflow.models.connection import Connection return {k for k in signature(Connection.__init__).parameters.keys() if k != "self"} @@ -186,7 +186,7 @@ def _parse_secret_file(file_path: str) -> dict[str, Any]: def _create_connection(conn_id: str, value: Any): - """Creates a connection based on a URL or JSON object.""" + """Create a connection based on a URL or JSON object.""" from airflow.models.connection import Connection if isinstance(value, str): @@ -243,7 +243,7 @@ def load_variables(file_path: str) -> dict[str, str]: def load_connections(file_path) -> dict[str, list[Any]]: - """Deprecated: Please use `airflow.secrets.local_filesystem.load_connections_dict`.""" + """Use `airflow.secrets.local_filesystem.load_connections_dict`, this is deprecated.""" warnings.warn( "This function is deprecated. Please use `airflow.secrets.local_filesystem.load_connections_dict`.", RemovedInAirflow3Warning, diff --git a/airflow/security/permissions.py b/airflow/security/permissions.py index 3259b48dc354..a5c862c1701b 100644 --- a/airflow/security/permissions.py +++ b/airflow/security/permissions.py @@ -70,7 +70,7 @@ def resource_name_for_dag(root_dag_id: str) -> str: - """Returns the resource name for a DAG id. + """Return the resource name for a DAG id. Note that since a sub-DAG should follow the permission of its parent DAG, you should pass ``DagModel.root_dag_id`` to this function, diff --git a/airflow/security/utils.py b/airflow/security/utils.py index 139e96a13c92..9ad7fc10e248 100644 --- a/airflow/security/utils.py +++ b/airflow/security/utils.py @@ -54,7 +54,7 @@ def get_components(principal) -> list[str] | None: def replace_hostname_pattern(components, host=None): - """Replaces hostname with the right pattern including lowercase of the name.""" + """Replace hostname with the right pattern including lowercase of the name.""" fqdn = host if not fqdn or fqdn == "0.0.0.0": fqdn = get_hostname() @@ -62,7 +62,7 @@ def replace_hostname_pattern(components, host=None): def get_fqdn(hostname_or_ip=None): - """Retrieves FQDN - hostname for the IP or hostname.""" + """Retrieve FQDN - hostname for the IP or hostname.""" try: if hostname_or_ip: fqdn = socket.gethostbyaddr(hostname_or_ip)[0] @@ -77,7 +77,7 @@ def get_fqdn(hostname_or_ip=None): def principal_from_username(username, realm): - """Retrieves principal from the user name and realm.""" + """Retrieve principal from the username and realm.""" if ("@" not in username) and realm: username = f"{username}@{realm}" diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index e23894a28de1..792d907d1f50 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -199,7 +199,7 @@ def _validate_input_values(self) -> None: ) def poke(self, context: Context) -> bool | PokeReturnValue: - """Function defined by the sensors while deriving this class should override.""" + """Override when deriving this class.""" raise AirflowException("Override me.") def execute(self, context: Context) -> Any: @@ -287,7 +287,7 @@ def _get_next_poke_interval( run_duration: Callable[[], float], try_number: int, ) -> float: - """Using the similar logic which is used for exponential backoff retry delay for operators.""" + """Use similar logic which is used for exponential backoff retry delay for operators.""" if not self.exponential_backoff: return self.poke_interval diff --git a/airflow/sensors/date_time.py b/airflow/sensors/date_time.py index 19168e98f3ec..142502887066 100644 --- a/airflow/sensors/date_time.py +++ b/airflow/sensors/date_time.py @@ -77,7 +77,7 @@ def poke(self, context: Context) -> bool: class DateTimeSensorAsync(DateTimeSensor): """ - Waits until the specified datetime occurs. + Wait until the specified datetime occurs. Deferring itself to avoid taking up a worker slot while it is waiting. It is a drop-in replacement for DateTimeSensor. @@ -92,5 +92,5 @@ def execute(self, context: Context): ) def execute_complete(self, context, event=None): - """Callback for when the trigger fires - returns immediately.""" + """Execute when the trigger fires - returns immediately.""" return None diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index 9e48d3e1406e..53270ac411fa 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -347,7 +347,7 @@ def execute(self, context: Context) -> None: ) def execute_complete(self, context, event=None): - """Callback for when the trigger fires - returns immediately.""" + """Execute when the trigger fires - return immediately.""" if event["status"] == "success": self.log.info("External task %s has executed successfully.", self.external_task_id) return None @@ -528,7 +528,7 @@ def __init__( @classmethod def get_serialized_fields(cls): - """Serialized ExternalTaskMarker contain exactly these fields + templated_fields .""" + """Serialize ExternalTaskMarker to contain exactly these fields + templated_fields .""" if not cls.__serialized_fields: cls.__serialized_fields = frozenset(super().get_serialized_fields() | {"recursion_depth"}) return cls.__serialized_fields diff --git a/airflow/sensors/time_delta.py b/airflow/sensors/time_delta.py index a73d123c3d76..1571334757af 100644 --- a/airflow/sensors/time_delta.py +++ b/airflow/sensors/time_delta.py @@ -67,5 +67,5 @@ def execute(self, context: Context): self.defer(trigger=DateTimeTrigger(moment=target_dttm), method_name="execute_complete") def execute_complete(self, context, event=None): - """Callback for when the trigger fires - returns immediately.""" + """Execute for when the trigger fires - return immediately.""" return None diff --git a/airflow/sensors/time_sensor.py b/airflow/sensors/time_sensor.py index 12b26d06bdd5..7f6809851a71 100644 --- a/airflow/sensors/time_sensor.py +++ b/airflow/sensors/time_sensor.py @@ -76,5 +76,5 @@ def execute(self, context: Context): ) def execute_complete(self, context, event=None): - """Callback for when the trigger fires - returns immediately.""" + """Execute when the trigger fires - returns immediately.""" return None diff --git a/airflow/serialization/serde.py b/airflow/serialization/serde.py index a9a09d86db53..5e5908df900d 100644 --- a/airflow/serialization/serde.py +++ b/airflow/serialization/serde.py @@ -65,7 +65,7 @@ def encode(cls: str, version: int, data: T) -> dict[str, str | int | T]: - """Encodes o so it can be understood by the deserializer.""" + """Encode an object so it can be understood by the deserializer.""" return {CLASSNAME: cls, VERSION: version, DATA: data} @@ -274,7 +274,7 @@ def deserialize(o: T | None, full=True, type_hint: Any = None) -> object: def _convert(old: dict) -> dict: - """Converts an old style serialization to new style.""" + """Convert an old style serialization to new style.""" if OLD_TYPE in old and OLD_DATA in old: # Return old style dicts directly as they do not need wrapping if old[OLD_TYPE] == OLD_DICT: diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index 8147dbee43a4..67d08b7a94fd 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -365,7 +365,7 @@ def _is_excluded(cls, var: Any, attrname: str, instance: Any) -> bool: def serialize_to_json( cls, object_to_serialize: BaseOperator | MappedOperator | DAG, decorated_fields: set ) -> dict[str, Any]: - """Serializes an object to JSON.""" + """Serialize an object to JSON.""" serialized_object: dict[str, Any] = {} keys_to_serialize = object_to_serialize.get_serialized_fields() for key in keys_to_serialize: @@ -395,7 +395,8 @@ def serialize_to_json( def serialize( cls, var: Any, *, strict: bool = False, use_pydantic_models: bool = False ) -> Any: # Unfortunately there is no support for recursive types in mypy - """Helper function of depth first search for serialization. + """ + Serialize an object; helper function of depth first search for serialization. The serialization protocol is: @@ -513,7 +514,8 @@ def default_serialization(cls, strict, var) -> str: @classmethod def deserialize(cls, encoded_var: Any, use_pydantic_models=False) -> Any: - """Helper function of depth first search for deserialization. + """ + Deserialize an object; helper function of depth first search for deserialization. :meta private: """ @@ -695,7 +697,7 @@ class DependencyDetector: @staticmethod def detect_task_dependencies(task: Operator) -> list[DagDependency]: - """Detects dependencies caused by tasks.""" + """Detect dependencies caused by tasks.""" from airflow.operators.trigger_dagrun import TriggerDagRunOperator from airflow.sensors.external_task import ExternalTaskSensor @@ -732,7 +734,7 @@ def detect_task_dependencies(task: Operator) -> list[DagDependency]: @staticmethod def detect_dag_dependencies(dag: DAG | None) -> Iterable[DagDependency]: - """Detects dependencies set directly on the DAG object.""" + """Detect dependencies set directly on the DAG object.""" if not dag: return for x in dag.dataset_triggers: @@ -831,7 +833,7 @@ def serialize_operator(cls, op: BaseOperator) -> dict[str, Any]: @classmethod def _serialize_node(cls, op: BaseOperator | MappedOperator, include_deps: bool) -> dict[str, Any]: - """Serializes operator into a JSON object.""" + """Serialize operator into a JSON object.""" serialize_op = cls.serialize_to_json(op, cls._decorated_fields) serialize_op["_task_type"] = getattr(op, "_task_type", type(op).__name__) serialize_op["_task_module"] = getattr(op, "_task_module", type(op).__module__) @@ -1079,7 +1081,7 @@ def deserialize_operator(cls, encoded_op: dict[str, Any]) -> Operator: @classmethod def detect_dependencies(cls, op: Operator) -> set[DagDependency]: - """Detects between DAG dependencies for the operator.""" + """Detect between DAG dependencies for the operator.""" def get_custom_dep() -> list[DagDependency]: """ @@ -1275,7 +1277,7 @@ def __get_constructor_defaults(): @classmethod def serialize_dag(cls, dag: DAG) -> dict: - """Serializes a DAG into a JSON object.""" + """Serialize a DAG into a JSON object.""" try: serialized_dag = cls.serialize_to_json(dag, cls._decorated_fields) @@ -1409,7 +1411,7 @@ class TaskGroupSerialization(BaseSerialization): @classmethod def serialize_task_group(cls, task_group: TaskGroup) -> dict[str, Any] | None: - """Serializes TaskGroup into a JSON object.""" + """Serialize TaskGroup into a JSON object.""" if not task_group: return None diff --git a/airflow/task/task_runner/base_task_runner.py b/airflow/task/task_runner/base_task_runner.py index 64523b17c5ae..c1045280dca6 100644 --- a/airflow/task/task_runner/base_task_runner.py +++ b/airflow/task/task_runner/base_task_runner.py @@ -178,7 +178,7 @@ def terminate(self) -> None: raise NotImplementedError() def on_finish(self) -> None: - """A callback that should be called when this is done running.""" + """Execute when this is done running.""" if self._cfg_path and os.path.isfile(self._cfg_path): if self.run_as_user: subprocess.call(["sudo", "rm", self._cfg_path], close_fds=True) diff --git a/airflow/template/templater.py b/airflow/template/templater.py index 29952b83b02e..19eb3f59965f 100644 --- a/airflow/template/templater.py +++ b/airflow/template/templater.py @@ -56,14 +56,15 @@ def get_template_env(self, dag: DAG | None = None) -> jinja2.Environment: return SandboxedEnvironment(cache_size=0) def prepare_template(self) -> None: - """Hook triggered after the templated fields get replaced by their content. + """ + Execute after the templated fields get replaced by their content. If you need your object to alter the content of the file before the template is rendered, it should override this method to do so. """ def resolve_template_files(self) -> None: - """Getting the content of files for template_field / template_ext.""" + """Get the content of files for template_field / template_ext.""" if self.template_ext: for field in self.template_fields: content = getattr(self, field, None) diff --git a/airflow/ti_deps/dep_context.py b/airflow/ti_deps/dep_context.py index 5a19c5bfc793..bd4c0db46198 100644 --- a/airflow/ti_deps/dep_context.py +++ b/airflow/ti_deps/dep_context.py @@ -85,7 +85,7 @@ class DepContext: def ensure_finished_tis(self, dag_run: DagRun, session: Session) -> list[TaskInstance]: """ - Ensures finished_tis is populated if it's currently None, which allows running tasks without dag_run. + Ensure finished_tis is populated if it's currently None, which allows running tasks without dag_run. :param dag_run: The DagRun for which to find finished tasks :return: A list of all the finished tasks of this DAG and execution_date diff --git a/airflow/ti_deps/deps/base_ti_dep.py b/airflow/ti_deps/deps/base_ti_dep.py index b4b34ae44422..ef18f3fcdf48 100644 --- a/airflow/ti_deps/deps/base_ti_dep.py +++ b/airflow/ti_deps/deps/base_ti_dep.py @@ -90,7 +90,7 @@ def get_dep_statuses( dep_context: DepContext | None = None, ) -> Iterator[TIDepStatus]: """ - Wrapper around the private _get_dep_statuses method. + Wrap around the private _get_dep_statuses method. Contains some global checks for all dependencies. @@ -113,7 +113,7 @@ def get_dep_statuses( @provide_session def is_met(self, ti: TaskInstance, session: Session, dep_context: DepContext | None = None) -> bool: """ - Returns whether a dependency is met for a given task instance. + Return whether a dependency is met for a given task instance. A dependency is considered met if all the dependency statuses it reports are passing. @@ -132,7 +132,7 @@ def get_failure_reasons( dep_context: DepContext | None = None, ) -> Iterator[str]: """ - Returns an iterable of strings that explain why this dependency wasn't met. + Return an iterable of strings that explain why this dependency wasn't met. :param ti: the task instance to see if this dependency is met for :param session: database session diff --git a/airflow/ti_deps/deps/dagrun_backfill_dep.py b/airflow/ti_deps/deps/dagrun_backfill_dep.py index d33e1af593a6..6d0a1efad262 100644 --- a/airflow/ti_deps/deps/dagrun_backfill_dep.py +++ b/airflow/ti_deps/deps/dagrun_backfill_dep.py @@ -32,7 +32,7 @@ class DagRunNotBackfillDep(BaseTIDep): @provide_session def _get_dep_statuses(self, ti, session, dep_context=None): """ - Determines if the DagRun is valid for scheduling from scheduler. + Determine if the DagRun is valid for scheduling from scheduler. :param ti: the task instance to get the dependency status for :param session: database session diff --git a/airflow/ti_deps/deps/pool_slots_available_dep.py b/airflow/ti_deps/deps/pool_slots_available_dep.py index e095cfb68e08..6263dc2ed1ec 100644 --- a/airflow/ti_deps/deps/pool_slots_available_dep.py +++ b/airflow/ti_deps/deps/pool_slots_available_dep.py @@ -31,7 +31,7 @@ class PoolSlotsAvailableDep(BaseTIDep): @provide_session def _get_dep_statuses(self, ti, session, dep_context=None): """ - Determines if the pool task instance is in has available slots. + Determine if the pool task instance is in has available slots. :param ti: the task instance to get the dependency status for :param session: database session diff --git a/airflow/ti_deps/deps/ready_to_reschedule.py b/airflow/ti_deps/deps/ready_to_reschedule.py index 4fca6f5538e8..0eaa52c1eb99 100644 --- a/airflow/ti_deps/deps/ready_to_reschedule.py +++ b/airflow/ti_deps/deps/ready_to_reschedule.py @@ -36,7 +36,7 @@ class ReadyToRescheduleDep(BaseTIDep): @provide_session def _get_dep_statuses(self, ti, session, dep_context): """ - Determines whether a task is ready to be rescheduled. + Determine whether a task is ready to be rescheduled. Only tasks in NONE state with at least one row in task_reschedule table are handled by this dependency class, otherwise this dependency is considered as passed. diff --git a/airflow/timetables/interval.py b/airflow/timetables/interval.py index 50478c6f7551..27e128ff5280 100644 --- a/airflow/timetables/interval.py +++ b/airflow/timetables/interval.py @@ -185,7 +185,8 @@ def deserialize(cls, data: dict[str, Any]) -> Timetable: return cls(datetime.timedelta(seconds=delta)) def __eq__(self, other: Any) -> bool: - """The offset should match. + """ + Return if the offsets match. This is only for testing purposes and should not be relied on otherwise. """ diff --git a/airflow/triggers/base.py b/airflow/triggers/base.py index 87dd12f317b8..4612c4edbf12 100644 --- a/airflow/triggers/base.py +++ b/airflow/triggers/base.py @@ -50,7 +50,7 @@ def _set_context(self, context): @abc.abstractmethod def serialize(self) -> tuple[str, dict[str, Any]]: """ - Returns the information needed to reconstruct this Trigger. + Return the information needed to reconstruct this Trigger. :return: Tuple of (class path, keyword arguments needed to re-instantiate). """ @@ -59,7 +59,7 @@ def serialize(self) -> tuple[str, dict[str, Any]]: @abc.abstractmethod async def run(self) -> AsyncIterator[TriggerEvent]: """ - Runs the trigger in an asynchronous context. + Run the trigger in an asynchronous context. The trigger should yield an Event whenever it wants to fire off an event, and return None if it is finished. Single-event triggers diff --git a/airflow/triggers/external_task.py b/airflow/triggers/external_task.py index f179cba259ca..74742fcccb26 100644 --- a/airflow/triggers/external_task.py +++ b/airflow/triggers/external_task.py @@ -69,7 +69,7 @@ def __init__( self._timeout_sec = 60 def serialize(self) -> tuple[str, dict[str, typing.Any]]: - """Serializes TaskStateTrigger arguments and classpath.""" + """Serialize TaskStateTrigger arguments and classpath.""" return ( "airflow.triggers.external_task.TaskStateTrigger", { @@ -84,10 +84,11 @@ def serialize(self) -> tuple[str, dict[str, typing.Any]]: async def run(self) -> typing.AsyncIterator[TriggerEvent]: """ - Checks periodically in the database to see if the dag exists and is in the running state. If found, - wait until the task specified will reach one of the expected states. If dag with specified name was - not in the running state after _timeout_sec seconds after starting execution process of the trigger, - terminate with status 'timeout'. + Check periodically in the database to see if the dag exists and is in the running state. + + If found, wait until the task specified will reach one of the expected states. + If dag with specified name was not in the running state after _timeout_sec seconds + after starting execution process of the trigger, terminate with status 'timeout'. """ while True: try: @@ -167,7 +168,7 @@ def __init__( self.poll_interval = poll_interval def serialize(self) -> tuple[str, dict[str, typing.Any]]: - """Serializes DagStateTrigger arguments and classpath.""" + """Serialize DagStateTrigger arguments and classpath.""" return ( "airflow.triggers.external_task.DagStateTrigger", { @@ -179,10 +180,7 @@ def serialize(self) -> tuple[str, dict[str, typing.Any]]: ) async def run(self) -> typing.AsyncIterator[TriggerEvent]: - """ - Checks periodically in the database to see if the dag run exists, and has - hit one of the states yet, or not. - """ + """Check the database to see if the dag run exists, and has hit one of the states yet, or not.""" while True: # mypy confuses typing here num_dags = await self.count_dags() # type: ignore[call-arg] diff --git a/airflow/triggers/file.py b/airflow/triggers/file.py index 4128b09814b4..12b2d0e8272e 100644 --- a/airflow/triggers/file.py +++ b/airflow/triggers/file.py @@ -48,7 +48,7 @@ def __init__( self.poll_interval = poll_interval def serialize(self) -> tuple[str, dict[str, Any]]: - """Serializes FileTrigger arguments and classpath.""" + """Serialize FileTrigger arguments and classpath.""" return ( "airflow.triggers.file.FileTrigger", { From 4af3afd56773d534b16a862f5c510364b8bda2ee Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Mon, 14 Aug 2023 20:32:49 -0700 Subject: [PATCH 035/117] D205 Support - Stragglers (#33301) (cherry picked from commit 9bf68adfa0d929c4400d3e9f02cd2feaa4932f59) --- airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py | 5 +++-- airflow/operators/python.py | 3 ++- airflow/plugins_manager.py | 1 + airflow/providers/papermill/operators/papermill.py | 6 +++--- airflow/providers_manager.py | 3 +++ airflow/sensors/external_task.py | 5 +---- airflow/triggers/external_task.py | 2 +- airflow/utils/hashlib_wrapper.py | 3 +-- 8 files changed, 15 insertions(+), 13 deletions(-) diff --git a/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py b/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py index aaacc8ce45fb..9154e83fde86 100644 --- a/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py +++ b/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py @@ -15,8 +15,9 @@ # specific language governing permissions and limitations # under the License. """ -Pod generator compatible with cncf-providers released before 2.7.0 of airflow (so pre-7.4.0 of -the cncf.kubernetes provider). +Pod generator compatible with cncf-providers released before 2.7.0 of airflow. + +Compatible with pre-7.4.0 of the cncf.kubernetes provider. This module provides an interface between the previous Pod API and outputs a kubernetes.client.models.V1Pod. diff --git a/airflow/operators/python.py b/airflow/operators/python.py index 36ae81650978..b3627a48ba19 100644 --- a/airflow/operators/python.py +++ b/airflow/operators/python.py @@ -780,7 +780,8 @@ def _get_airflow_version_from_target_env(self) -> str | None: class BranchExternalPythonOperator(ExternalPythonOperator, SkipMixin): """ - A workflow can "branch" or follow a path after the execution of this task, + A workflow can "branch" or follow a path after the execution of this task. + Extends ExternalPythonOperator, so expects to get Python: virtualenv that should be used (in ``VENV/bin`` folder). Should be absolute path, so it can run on separate virtualenv similarly to ExternalPythonOperator. diff --git a/airflow/plugins_manager.py b/airflow/plugins_manager.py index 17acdd245c86..1b117101d7f6 100644 --- a/airflow/plugins_manager.py +++ b/airflow/plugins_manager.py @@ -206,6 +206,7 @@ def is_valid_plugin(plugin_obj): def register_plugin(plugin_instance): """ Start plugin load and register it after success initialization. + If plugin is already registered, do nothing. :param plugin_instance: subclass of AirflowPlugin diff --git a/airflow/providers/papermill/operators/papermill.py b/airflow/providers/papermill/operators/papermill.py index 9fe281503f32..e216a5e068d1 100644 --- a/airflow/providers/papermill/operators/papermill.py +++ b/airflow/providers/papermill/operators/papermill.py @@ -17,7 +17,7 @@ # under the License. from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Collection, Optional, Sequence +from typing import TYPE_CHECKING, ClassVar, Collection, Sequence import attr import papermill as pm @@ -42,8 +42,8 @@ class NoteBook(File): *(File.template_fields if hasattr(File, "template_fields") else {"url"}), } - type_hint: Optional[str] = "jupyter_notebook" # noqa: UP007 - parameters: Optional[dict] = {} # noqa: UP007 + type_hint: str | None = "jupyter_notebook" + parameters: dict | None = {} meta_schema: str = __name__ + ".NoteBook" diff --git a/airflow/providers_manager.py b/airflow/providers_manager.py index b7689a92da61..febc42a592d9 100644 --- a/airflow/providers_manager.py +++ b/airflow/providers_manager.py @@ -298,6 +298,7 @@ def _correctness_check( ) -> type[BaseHook] | None: """ Performs coherence check on provider classes. + For apache-airflow providers - it checks if it starts with appropriate package. For all providers it tries to import the provider - checking that there are no exceptions during importing. It logs appropriate warning in case it detects any problems. @@ -568,6 +569,7 @@ def _discover_all_providers_from_packages(self) -> None: def _discover_all_airflow_builtin_providers_from_local_sources(self) -> None: """ Finds all built-in airflow providers if airflow is run from the local sources. + It finds `provider.yaml` files for all such providers and registers the providers using those. This 'provider.yaml' scanning takes precedence over scanning packages installed @@ -1113,6 +1115,7 @@ def extra_links_class_names(self) -> list[str]: def connection_form_widgets(self) -> dict[str, ConnectionFormWidgetInfo]: """ Returns widgets for connection forms. + Dictionary keys in the same order that it defined in Hook. """ self.initialize_providers_hooks() diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index 53270ac411fa..ffc2cc1313f0 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -327,10 +327,7 @@ def poke(self, context: Context, session: Session = NEW_SESSION) -> bool: return count_allowed == len(dttm_filter) def execute(self, context: Context) -> None: - """ - Airflow runs this method on the worker and defers using the triggers - if deferrable is set to True. - """ + """Runs on the worker and defers using the triggers if deferrable is set to True.""" if not self.deferrable: super().execute(context) else: diff --git a/airflow/triggers/external_task.py b/airflow/triggers/external_task.py index 74742fcccb26..431d91a15e02 100644 --- a/airflow/triggers/external_task.py +++ b/airflow/triggers/external_task.py @@ -180,7 +180,7 @@ def serialize(self) -> tuple[str, dict[str, typing.Any]]: ) async def run(self) -> typing.AsyncIterator[TriggerEvent]: - """Check the database to see if the dag run exists, and has hit one of the states yet, or not.""" + """Check periodically if the dag run exists, and has hit one of the states yet, or not.""" while True: # mypy confuses typing here num_dags = await self.count_dags() # type: ignore[call-arg] diff --git a/airflow/utils/hashlib_wrapper.py b/airflow/utils/hashlib_wrapper.py index 65a18566b775..1390ada334cd 100644 --- a/airflow/utils/hashlib_wrapper.py +++ b/airflow/utils/hashlib_wrapper.py @@ -28,8 +28,7 @@ def md5(__string: ReadableBuffer = b"") -> hashlib._Hash: """ - Safely allows calling the ``hashlib.md5`` function when ``usedforsecurity`` is disabled in - the configuration. + Safely allows calling the ``hashlib.md5`` function when ``usedforsecurity`` is disabled in configuration. :param __string: The data to hash. Default to empty str byte. :return: The hashed value. From bb6ea0f29809c89d7ed7e0ea2b16f3f050e885c1 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Sat, 12 Aug 2023 04:28:12 +0530 Subject: [PATCH 036/117] Remove the replace to
tag in Provider's view (#33326) (cherry picked from commit 23d542462a1aaa5afcd36dedc3c2a12c840e1d2c) --- airflow/www/views.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 31a768abf2bc..8838785f4550 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -4872,7 +4872,6 @@ def _build_link(match_obj): cd = escape(description) cd = re2.sub(r"`(.*)[\s+]+<(.*)>`__", _build_link, cd) - cd = re2.sub(r"\n", r"
", cd) return Markup(cd) From 548e3f55b3e9bee69b9505055c1a40b9a941570e Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 12 Aug 2023 02:31:58 -0700 Subject: [PATCH 037/117] D401 Support - Airflow/api thru Airflow/auth (#33333) (cherry picked from commit b657ae914e3783f28be16e7d3acdc86cb3d37e83) --- .../endpoints/role_and_permission_endpoint.py | 2 +- airflow/api_connexion/exceptions.py | 2 +- airflow/api_connexion/parameters.py | 4 ++-- airflow/api_connexion/schemas/dag_schema.py | 6 +++--- airflow/api_connexion/schemas/pool_schema.py | 12 ++++++------ .../api_connexion/schemas/task_instance_schema.py | 4 ++-- airflow/api_connexion/security.py | 4 ++-- airflow/api_internal/endpoints/rpc_api_endpoint.py | 2 +- airflow/api_internal/internal_api_call.py | 6 ++++-- 9 files changed, 22 insertions(+), 20 deletions(-) diff --git a/airflow/api_connexion/endpoints/role_and_permission_endpoint.py b/airflow/api_connexion/endpoints/role_and_permission_endpoint.py index 34b0f5478a57..c14312967108 100644 --- a/airflow/api_connexion/endpoints/role_and_permission_endpoint.py +++ b/airflow/api_connexion/endpoints/role_and_permission_endpoint.py @@ -42,7 +42,7 @@ def _check_action_and_resource(sm: AirflowSecurityManager, perms: list[tuple[str, str]]) -> None: """ - Checks if the action or resource exists and otherwise raise 400. + Check if the action or resource exists and otherwise raise 400. This function is intended for use in the REST API because it raise 400 """ diff --git a/airflow/api_connexion/exceptions.py b/airflow/api_connexion/exceptions.py index 11468e1506fe..bd9dcb4a11cc 100644 --- a/airflow/api_connexion/exceptions.py +++ b/airflow/api_connexion/exceptions.py @@ -39,7 +39,7 @@ def common_error_handler(exception: BaseException) -> flask.Response: - """Used to capture connexion exceptions and add link to the type field.""" + """Use to capture connexion exceptions and add link to the type field.""" if isinstance(exception, ProblemException): link = EXCEPTIONS_LINK_MAP.get(exception.status) diff --git a/airflow/api_connexion/parameters.py b/airflow/api_connexion/parameters.py index f4f55cfecd37..0c778a420996 100644 --- a/airflow/api_connexion/parameters.py +++ b/airflow/api_connexion/parameters.py @@ -33,7 +33,7 @@ def validate_istimezone(value: datetime) -> None: - """Validates that a datetime is not naive.""" + """Validate that a datetime is not naive.""" if not value.tzinfo: raise BadRequest("Invalid datetime format", detail="Naive datetime is disallowed") @@ -85,7 +85,7 @@ def check_limit(value: int) -> int: def format_parameters(params_formatters: dict[str, Callable[[Any], Any]]) -> Callable[[T], T]: """ - Decorator factory that create decorator that convert parameters using given formatters. + Create a decorator to convert parameters using given formatters. Using it allows you to separate parameter formatting from endpoint logic. diff --git a/airflow/api_connexion/schemas/dag_schema.py b/airflow/api_connexion/schemas/dag_schema.py index 182bbf180334..08f649b3d699 100644 --- a/airflow/api_connexion/schemas/dag_schema.py +++ b/airflow/api_connexion/schemas/dag_schema.py @@ -117,7 +117,7 @@ def get_concurrency(obj: DAG): @staticmethod def get_tags(obj: DAG): - """Dumps tags as objects.""" + """Dump tags as objects.""" tags = obj.tags if tags: return [DagTagSchema().dump(dict(name=tag)) for tag in tags] @@ -132,12 +132,12 @@ def get_owners(obj: DAG): @staticmethod def get_is_paused(obj: DAG): - """Checks entry in DAG table to see if this DAG is paused.""" + """Check entry in DAG table to see if this DAG is paused.""" return obj.get_is_paused() @staticmethod def get_is_active(obj: DAG): - """Checks entry in DAG table to see if this DAG is active.""" + """Check entry in DAG table to see if this DAG is active.""" return obj.get_is_active() @staticmethod diff --git a/airflow/api_connexion/schemas/pool_schema.py b/airflow/api_connexion/schemas/pool_schema.py index e18548cee844..fd42d71e8957 100644 --- a/airflow/api_connexion/schemas/pool_schema.py +++ b/airflow/api_connexion/schemas/pool_schema.py @@ -46,32 +46,32 @@ class Meta: @staticmethod def get_occupied_slots(obj: Pool) -> int: - """Returns the occupied slots of the pool.""" + """Return the occupied slots of the pool.""" return obj.occupied_slots() @staticmethod def get_running_slots(obj: Pool) -> int: - """Returns the running slots of the pool.""" + """Return the running slots of the pool.""" return obj.running_slots() @staticmethod def get_queued_slots(obj: Pool) -> int: - """Returns the queued slots of the pool.""" + """Return the queued slots of the pool.""" return obj.queued_slots() @staticmethod def get_scheduled_slots(obj: Pool) -> int: - """Returns the scheduled slots of the pool.""" + """Return the scheduled slots of the pool.""" return obj.scheduled_slots() @staticmethod def get_deferred_slots(obj: Pool) -> int: - """Returns the deferred slots of the pool.""" + """Return the deferred slots of the pool.""" return obj.deferred_slots() @staticmethod def get_open_slots(obj: Pool) -> float: - """Returns the open slots of the pool.""" + """Return the open slots of the pool.""" return obj.open_slots() diff --git a/airflow/api_connexion/schemas/task_instance_schema.py b/airflow/api_connexion/schemas/task_instance_schema.py index a3ce7c6a62e4..9aea40e195bf 100644 --- a/airflow/api_connexion/schemas/task_instance_schema.py +++ b/airflow/api_connexion/schemas/task_instance_schema.py @@ -135,7 +135,7 @@ class ClearTaskInstanceFormSchema(Schema): @validates_schema def validate_form(self, data, **kwargs): - """Validates clear task instance form.""" + """Validate clear task instance form.""" if data["only_failed"] and data["only_running"]: raise ValidationError("only_failed and only_running both are set to True") if data["start_date"] and data["end_date"]: @@ -169,7 +169,7 @@ class SetTaskInstanceStateFormSchema(Schema): @validates_schema def validate_form(self, data, **kwargs): - """Validates set task instance state form.""" + """Validate set task instance state form.""" if not exactly_one(data.get("execution_date"), data.get("dag_run_id")): raise ValidationError("Exactly one of execution_date or dag_run_id must be provided") diff --git a/airflow/api_connexion/security.py b/airflow/api_connexion/security.py index 664899359c2b..b108adc2c36b 100644 --- a/airflow/api_connexion/security.py +++ b/airflow/api_connexion/security.py @@ -28,7 +28,7 @@ def check_authentication() -> None: - """Checks that the request has valid authorization information.""" + """Check that the request has valid authorization information.""" for auth in get_airflow_app().api_auth: response = auth.requires_authentication(Response)() if response.status_code == 200: @@ -39,7 +39,7 @@ def check_authentication() -> None: def requires_access(permissions: Sequence[tuple[str, str]] | None = None) -> Callable[[T], T]: - """Factory for decorator that checks current user's permissions against required permissions.""" + """Check current user's permissions against required permissions.""" appbuilder = get_airflow_app().appbuilder if appbuilder.update_perms: appbuilder.sm.sync_resource_permissions(permissions) diff --git a/airflow/api_internal/endpoints/rpc_api_endpoint.py b/airflow/api_internal/endpoints/rpc_api_endpoint.py index b6ac604c0539..700408bf35e1 100644 --- a/airflow/api_internal/endpoints/rpc_api_endpoint.py +++ b/airflow/api_internal/endpoints/rpc_api_endpoint.py @@ -65,7 +65,7 @@ def _initialize_map() -> dict[str, Callable]: def internal_airflow_api(body: dict[str, Any]) -> APIResponse: - """Handler for Internal API /internal_api/v1/rpcapi endpoint.""" + """Handle Internal API /internal_api/v1/rpcapi endpoint.""" log.debug("Got request") json_rpc = body.get("jsonrpc") if json_rpc != "2.0": diff --git a/airflow/api_internal/internal_api_call.py b/airflow/api_internal/internal_api_call.py index d0e848e79386..d9c49a138fd5 100644 --- a/airflow/api_internal/internal_api_call.py +++ b/airflow/api_internal/internal_api_call.py @@ -42,7 +42,8 @@ class InternalApiConfig: @staticmethod def force_database_direct_access(): - """Current component will not use Internal API. + """ + Block current component from using Internal API. All methods decorated with internal_api_call will always be executed locally. This mode is needed for "trusted" components like Scheduler, Webserver or Internal Api server. @@ -80,7 +81,8 @@ def _init_values(): def internal_api_call(func: Callable[PS, RT]) -> Callable[PS, RT]: - """Decorator for methods which may be executed in database isolation mode. + """ + Allow methods to be executed in database isolation mode. If [core]database_access_isolation is true then such method are not executed locally, but instead RPC call is made to Database API (aka Internal API). This makes some components From f7e39af87de78eaf4da42a4e396f3e75e7296426 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 12 Aug 2023 07:38:14 -0700 Subject: [PATCH 038/117] Fix some missing type hints (#33334) (cherry picked from commit 3c48dc714b65d2ed5e29c8b29fc58cc4a3afabf8) --- airflow/models/dag.py | 8 ++++---- airflow/settings.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/airflow/models/dag.py b/airflow/models/dag.py index d4ff29db2fd1..f2d9b8e63d43 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -507,7 +507,7 @@ def __init__( tzinfo = None if date.tzinfo else settings.TIMEZONE tz = pendulum.instance(date, tz=tzinfo).timezone - self.timezone = tz or settings.TIMEZONE + self.timezone: Timezone = tz or settings.TIMEZONE # Apply the timezone we settled on to end_date if it wasn't supplied if "end_date" in self.default_args and self.default_args["end_date"]: @@ -607,9 +607,9 @@ def __init__( f"Invalid values of dag.orientation: only support " f"{ORIENTATION_PRESETS}, but get {orientation}" ) - self.catchup = catchup + self.catchup: bool = catchup - self.partial = False + self.partial: bool = False self.on_success_callback = on_success_callback self.on_failure_callback = on_failure_callback @@ -627,7 +627,7 @@ def __init__( self.is_paused_upon_creation = is_paused_upon_creation self.auto_register = auto_register - self.fail_stop = fail_stop + self.fail_stop: bool = fail_stop self.jinja_environment_kwargs = jinja_environment_kwargs self.render_template_as_native_obj = render_template_as_native_obj diff --git a/airflow/settings.py b/airflow/settings.py index bdf70ecf27c8..8e18cfcec72b 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -565,12 +565,12 @@ def initialize(): # By default Airflow plugins are lazily-loaded (only loaded when required). Set it to False, # if you want to load plugins whenever 'airflow' is invoked via cli or loaded from module. -LAZY_LOAD_PLUGINS = conf.getboolean("core", "lazy_load_plugins", fallback=True) +LAZY_LOAD_PLUGINS: bool = conf.getboolean("core", "lazy_load_plugins", fallback=True) # By default Airflow providers are lazily-discovered (discovery and imports happen only when required). # Set it to False, if you want to discover providers whenever 'airflow' is invoked via cli or # loaded from module. -LAZY_LOAD_PROVIDERS = conf.getboolean("core", "lazy_discover_providers", fallback=True) +LAZY_LOAD_PROVIDERS: bool = conf.getboolean("core", "lazy_discover_providers", fallback=True) # Determines if the executor utilizes Kubernetes IS_K8S_OR_K8SCELERY_EXECUTOR = conf.get("core", "EXECUTOR") in { @@ -610,4 +610,10 @@ def initialize(): # AIP-44: internal_api (experimental) # This feature is not complete yet, so we disable it by default. -_ENABLE_AIP_44 = os.environ.get("AIRFLOW_ENABLE_AIP_44", "false").lower() in {"true", "t", "yes", "y", "1"} +_ENABLE_AIP_44: bool = os.environ.get("AIRFLOW_ENABLE_AIP_44", "false").lower() in { + "true", + "t", + "yes", + "y", + "1", +} From 448cf69700a39b208fd1f21419ba473b01469b49 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 12 Aug 2023 02:32:43 -0700 Subject: [PATCH 039/117] D401 Support - airflow/callbacks thru airflow/decorators (#33335) (cherry picked from commit 396fd3cae91df2d16eeb9d399784687e87e47035) --- airflow/callbacks/base_callback_sink.py | 2 +- airflow/callbacks/database_callback_sink.py | 2 +- airflow/callbacks/pipe_callback_sink.py | 2 +- airflow/cli/cli_config.py | 4 +-- airflow/cli/cli_parser.py | 2 +- airflow/cli/commands/celery_command.py | 8 +++--- airflow/cli/commands/connection_command.py | 12 ++++----- airflow/cli/commands/dag_command.py | 26 +++++++++---------- airflow/cli/commands/dag_processor_command.py | 4 +-- airflow/cli/commands/db_command.py | 12 ++++----- airflow/cli/commands/info_command.py | 6 ++--- airflow/cli/commands/internal_api_command.py | 2 +- airflow/cli/commands/jobs_command.py | 2 +- airflow/cli/commands/kubernetes_command.py | 8 ++++-- airflow/cli/commands/legacy_commands.py | 2 +- airflow/cli/commands/pool_command.py | 16 ++++++------ airflow/cli/commands/provider_command.py | 20 +++++++------- airflow/cli/commands/role_command.py | 12 ++++----- airflow/cli/commands/scheduler_command.py | 6 ++--- airflow/cli/commands/standalone_command.py | 17 ++++++------ airflow/cli/commands/sync_perm_command.py | 2 +- airflow/cli/commands/task_command.py | 18 ++++++------- airflow/cli/commands/triggerer_command.py | 4 +-- airflow/cli/commands/user_command.py | 12 ++++----- airflow/cli/commands/variable_command.py | 14 +++++----- airflow/cli/commands/version_command.py | 2 +- airflow/cli/commands/webserver_command.py | 10 +++---- airflow/cli/simple_table.py | 10 +++---- airflow/dag_processing/manager.py | 26 +++++++++---------- airflow/dag_processing/processor.py | 4 +-- airflow/decorators/base.py | 2 +- airflow/decorators/branch_python.py | 2 +- airflow/decorators/external_python.py | 3 ++- airflow/decorators/python.py | 3 ++- airflow/decorators/python_virtualenv.py | 3 ++- airflow/decorators/sensor.py | 2 +- airflow/decorators/short_circuit.py | 3 ++- 37 files changed, 146 insertions(+), 139 deletions(-) diff --git a/airflow/callbacks/base_callback_sink.py b/airflow/callbacks/base_callback_sink.py index c243f0fbd640..2f02ce054cf9 100644 --- a/airflow/callbacks/base_callback_sink.py +++ b/airflow/callbacks/base_callback_sink.py @@ -24,5 +24,5 @@ class BaseCallbackSink: """Base class for Callbacks Sinks.""" def send(self, callback: CallbackRequest) -> None: - """Sends callback for execution.""" + """Send callback for execution.""" raise NotImplementedError() diff --git a/airflow/callbacks/database_callback_sink.py b/airflow/callbacks/database_callback_sink.py index 24306170dfea..0be8127f6f55 100644 --- a/airflow/callbacks/database_callback_sink.py +++ b/airflow/callbacks/database_callback_sink.py @@ -30,6 +30,6 @@ class DatabaseCallbackSink(BaseCallbackSink): @provide_session def send(self, callback: CallbackRequest, session: Session = NEW_SESSION) -> None: - """Sends callback for execution.""" + """Send callback for execution.""" db_callback = DbCallbackRequest(callback=callback, priority_weight=10) session.add(db_callback) diff --git a/airflow/callbacks/pipe_callback_sink.py b/airflow/callbacks/pipe_callback_sink.py index d702a781fa57..0c9433b057ca 100644 --- a/airflow/callbacks/pipe_callback_sink.py +++ b/airflow/callbacks/pipe_callback_sink.py @@ -36,7 +36,7 @@ def __init__(self, get_sink_pipe: Callable[[], MultiprocessingConnection]): def send(self, callback: CallbackRequest): """ - Sends information about the callback to be executed by Pipe. + Send information about the callback to be executed by Pipe. :param callback: Callback request to be executed. """ diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index 1314ffa0e3e0..962a905fd604 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -131,12 +131,12 @@ def _check(value): def string_list_type(val): - """Parses comma-separated list and returns list of string (strips whitespace).""" + """Parse comma-separated list and returns list of string (strips whitespace).""" return [x.strip() for x in val.split(",")] def string_lower_type(val): - """Lowers arg.""" + """Lower arg.""" if not val: return return val.strip().lower() diff --git a/airflow/cli/cli_parser.py b/airflow/cli/cli_parser.py index 3e1e2bd5c132..8e4d819098c5 100644 --- a/airflow/cli/cli_parser.py +++ b/airflow/cli/cli_parser.py @@ -110,7 +110,7 @@ def add_argument(self, action: Action) -> None: @lru_cache(maxsize=None) def get_parser(dag_parser: bool = False) -> argparse.ArgumentParser: - """Creates and returns command line argument parser.""" + """Create and returns command line argument parser.""" parser = DefaultHelpParser(prog="airflow", formatter_class=AirflowHelpFormatter) subparsers = parser.add_subparsers(dest="subcommand", metavar="GROUP_OR_COMMAND") subparsers.required = True diff --git a/airflow/cli/commands/celery_command.py b/airflow/cli/commands/celery_command.py index af7dcbc6077a..eb53d6f60db6 100644 --- a/airflow/cli/commands/celery_command.py +++ b/airflow/cli/commands/celery_command.py @@ -45,7 +45,7 @@ @cli_utils.action_cli @providers_configuration_loaded def flower(args): - """Starts Flower, Celery monitoring tool.""" + """Start Flower, Celery monitoring tool.""" # This needs to be imported locally to not trigger Providers Manager initialization from airflow.providers.celery.executors.celery_executor import app as celery_app @@ -94,7 +94,7 @@ def flower(args): @contextmanager def _serve_logs(skip_serve_logs: bool = False): - """Starts serve_logs sub-process.""" + """Start serve_logs sub-process.""" sub_proc = None if skip_serve_logs is False: sub_proc = Process(target=serve_logs) @@ -137,7 +137,7 @@ def filter(self, record): @cli_utils.action_cli @providers_configuration_loaded def worker(args): - """Starts Airflow Celery worker.""" + """Start Airflow Celery worker.""" # This needs to be imported locally to not trigger Providers Manager initialization from airflow.providers.celery.executors.celery_executor import app as celery_app @@ -245,7 +245,7 @@ def worker(args): @cli_utils.action_cli @providers_configuration_loaded def stop_worker(args): - """Sends SIGTERM to Celery worker.""" + """Send SIGTERM to Celery worker.""" # Read PID from file if args.pid: pid_file_path = args.pid diff --git a/airflow/cli/commands/connection_command.py b/airflow/cli/commands/connection_command.py index 02251a70ec02..bf7ebc6ba5f2 100644 --- a/airflow/cli/commands/connection_command.py +++ b/airflow/cli/commands/connection_command.py @@ -80,7 +80,7 @@ def connections_get(args): @suppress_logs_and_warning @providers_configuration_loaded def connections_list(args): - """Lists all connections at the command line.""" + """List all connections at the command line.""" with create_session() as session: query = select(Connection) if args.conn_id: @@ -149,7 +149,7 @@ def _valid_uri(uri: str) -> bool: @cache def _get_connection_types() -> list[str]: - """Returns connection types available.""" + """Return connection types available.""" _connection_types = ["fs", "mesos_framework-id", "email", "generic"] providers_manager = ProvidersManager() for connection_type, provider_info in providers_manager.hooks.items(): @@ -160,7 +160,7 @@ def _get_connection_types() -> list[str]: @providers_configuration_loaded def connections_export(args): - """Exports all connections to a file.""" + """Export all connections to a file.""" file_formats = [".yaml", ".json", ".env"] if args.format: warnings.warn("Option `--format` is deprecated. Use `--file-format` instead.", DeprecationWarning) @@ -211,7 +211,7 @@ def connections_export(args): @cli_utils.action_cli @providers_configuration_loaded def connections_add(args): - """Adds new connection.""" + """Add new connection.""" has_uri = bool(args.conn_uri) has_json = bool(args.conn_json) has_type = bool(args.conn_type) @@ -303,7 +303,7 @@ def connections_add(args): @cli_utils.action_cli @providers_configuration_loaded def connections_delete(args): - """Deletes connection from DB.""" + """Delete connection from DB.""" with create_session() as session: try: to_delete = session.scalars(select(Connection).where(Connection.conn_id == args.conn_id)).one() @@ -319,7 +319,7 @@ def connections_delete(args): @cli_utils.action_cli(check_db=False) @providers_configuration_loaded def connections_import(args): - """Imports connections from a file.""" + """Import connections from a file.""" if os.path.exists(args.file): _import_helper(args.file, args.overwrite) else: diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index f965e24b5ef7..ea06619917f8 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -123,7 +123,7 @@ def _run_dag_backfill(dags: list[DAG], args) -> None: @cli_utils.action_cli @providers_configuration_loaded def dag_backfill(args, dag: list[DAG] | DAG | None = None) -> None: - """Creates backfill job or dry run for a DAG or list of DAGs using regex.""" + """Create backfill job or dry run for a DAG or list of DAGs using regex.""" logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) signal.signal(signal.SIGTERM, sigint_handler) warnings.warn( @@ -154,7 +154,7 @@ def dag_backfill(args, dag: list[DAG] | DAG | None = None) -> None: @cli_utils.action_cli @providers_configuration_loaded def dag_trigger(args) -> None: - """Creates a dag run for the specified dag.""" + """Create a dag run for the specified dag.""" api_client = get_current_api_client() try: message = api_client.trigger_dag( @@ -175,7 +175,7 @@ def dag_trigger(args) -> None: @cli_utils.action_cli @providers_configuration_loaded def dag_delete(args) -> None: - """Deletes all DB records related to the specified dag.""" + """Delete all DB records related to the specified dag.""" api_client = get_current_api_client() if ( args.yes @@ -207,7 +207,7 @@ def dag_unpause(args) -> None: @providers_configuration_loaded def set_is_paused(is_paused: bool, args) -> None: - """Sets is_paused for DAG by a given dag_id.""" + """Set is_paused for DAG by a given dag_id.""" dag = DagModel.get_dagmodel(args.dag_id) if not dag: @@ -220,7 +220,7 @@ def set_is_paused(is_paused: bool, args) -> None: @providers_configuration_loaded def dag_dependencies_show(args) -> None: - """Displays DAG dependencies, save to file or show as imgcat image.""" + """Display DAG dependencies, save to file or show as imgcat image.""" dot = render_dag_dependencies(SerializedDagModel.get_dag_dependencies()) filename = args.save imgcat = args.imgcat @@ -240,7 +240,7 @@ def dag_dependencies_show(args) -> None: @providers_configuration_loaded def dag_show(args) -> None: - """Displays DAG or saves it's graphic representation to the file.""" + """Display DAG or saves it's graphic representation to the file.""" dag = get_dag(args.subdir, args.dag_id) dot = render_dag(dag) filename = args.save @@ -286,7 +286,7 @@ def _save_dot_to_file(dot: Dot, filename: str) -> None: @provide_session def dag_state(args, session: Session = NEW_SESSION) -> None: """ - Returns the state (and conf if exists) of a DagRun at the command line. + Return the state (and conf if exists) of a DagRun at the command line. >>> airflow dags state tutorial 2015-01-01T00:00:00.000000 running @@ -309,7 +309,7 @@ def dag_state(args, session: Session = NEW_SESSION) -> None: @providers_configuration_loaded def dag_next_execution(args) -> None: """ - Returns the next execution datetime of a DAG at the command line. + Return the next execution datetime of a DAG at the command line. >>> airflow dags next-execution tutorial 2018-08-31 10:38:00 @@ -348,7 +348,7 @@ def print_execution_interval(interval: DataInterval | None): @suppress_logs_and_warning @providers_configuration_loaded def dag_list_dags(args) -> None: - """Displays dags with or without stats at the command line.""" + """Display dags with or without stats at the command line.""" dagbag = DagBag(process_subdir(args.subdir)) if dagbag.import_errors: from rich import print as rich_print @@ -396,7 +396,7 @@ def dag_details(args, session=NEW_SESSION): @suppress_logs_and_warning @providers_configuration_loaded def dag_list_import_errors(args) -> None: - """Displays dags with import errors on the command line.""" + """Display dags with import errors on the command line.""" dagbag = DagBag(process_subdir(args.subdir)) data = [] for filename, errors in dagbag.import_errors.items(): @@ -411,7 +411,7 @@ def dag_list_import_errors(args) -> None: @suppress_logs_and_warning @providers_configuration_loaded def dag_report(args) -> None: - """Displays dagbag stats at the command line.""" + """Display dagbag stats at the command line.""" dagbag = DagBag(process_subdir(args.subdir)) AirflowConsole().print_as( data=dagbag.dagbag_stats, @@ -431,7 +431,7 @@ def dag_report(args) -> None: @providers_configuration_loaded @provide_session def dag_list_jobs(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> None: - """Lists latest n jobs.""" + """List latest n jobs.""" queries = [] if dag: args.dag_id = dag.dag_id @@ -462,7 +462,7 @@ def dag_list_jobs(args, dag: DAG | None = None, session: Session = NEW_SESSION) @providers_configuration_loaded @provide_session def dag_list_dag_runs(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> None: - """Lists dag runs for a given DAG.""" + """List dag runs for a given DAG.""" if dag: args.dag_id = dag.dag_id else: diff --git a/airflow/cli/commands/dag_processor_command.py b/airflow/cli/commands/dag_processor_command.py index 757bd778cca5..cf880f6622e9 100644 --- a/airflow/cli/commands/dag_processor_command.py +++ b/airflow/cli/commands/dag_processor_command.py @@ -37,7 +37,7 @@ def _create_dag_processor_job_runner(args: Any) -> DagProcessorJobRunner: - """Creates DagFileProcessorProcess instance.""" + """Create DagFileProcessorProcess instance.""" processor_timeout_seconds: int = conf.getint("core", "dag_file_processor_timeout") processor_timeout = timedelta(seconds=processor_timeout_seconds) @@ -56,7 +56,7 @@ def _create_dag_processor_job_runner(args: Any) -> DagProcessorJobRunner: @cli_utils.action_cli @providers_configuration_loaded def dag_processor(args): - """Starts Airflow Dag Processor Job.""" + """Start Airflow Dag Processor Job.""" if not conf.getboolean("scheduler", "standalone_dag_processor"): raise SystemExit("The option [scheduler/standalone_dag_processor] must be True.") diff --git a/airflow/cli/commands/db_command.py b/airflow/cli/commands/db_command.py index 2fb21dfd2295..f0d3ac4edc94 100644 --- a/airflow/cli/commands/db_command.py +++ b/airflow/cli/commands/db_command.py @@ -39,7 +39,7 @@ @providers_configuration_loaded def initdb(args): - """Initializes the metadata database.""" + """Initialize the metadata database.""" warnings.warn( "`db init` is deprecated. Use `db migrate` instead to migrate the db and/or " "airflow connections create-default-connections to create the default connections", @@ -52,7 +52,7 @@ def initdb(args): @providers_configuration_loaded def resetdb(args): - """Resets the metadata database.""" + """Reset the metadata database.""" print("DB: " + repr(settings.engine.url)) if not (args.yes or input("This will drop existing tables if they exist. Proceed? (y/n)").upper() == "Y"): raise SystemExit("Cancelled") @@ -161,7 +161,7 @@ def downgrade(args): @providers_configuration_loaded def check_migrations(args): - """Function to wait for all airflow migrations to complete. Used for launching airflow in k8s.""" + """Wait for all airflow migrations to complete. Used for launching airflow in k8s.""" db.check_migrations(timeout=args.migration_wait_timeout) @@ -212,7 +212,7 @@ def shell(args): @cli_utils.action_cli(check_db=False) @providers_configuration_loaded def check(args): - """Runs a check command that checks if db is available.""" + """Run a check command that checks if db is available.""" retries: int = args.retry retry_delay: int = args.retry_delay @@ -251,7 +251,7 @@ def cleanup_tables(args): @cli_utils.action_cli(check_db=False) @providers_configuration_loaded def export_archived(args): - """Exports archived records from metadata database.""" + """Export archived records from metadata database.""" export_archived_records( export_format=args.export_format, output_path=args.output_path, @@ -264,7 +264,7 @@ def export_archived(args): @cli_utils.action_cli(check_db=False) @providers_configuration_loaded def drop_archived(args): - """Drops archived tables from metadata database.""" + """Drop archived tables from metadata database.""" drop_archived_tables( table_names=args.tables, needs_confirm=not args.yes, diff --git a/airflow/cli/commands/info_command.py b/airflow/cli/commands/info_command.py index 2e60d80b271d..3a4ba396e4cd 100644 --- a/airflow/cli/commands/info_command.py +++ b/airflow/cli/commands/info_command.py @@ -209,7 +209,7 @@ def _get_version(cmd: list[str], grep: bytes | None = None): @staticmethod def _task_logging_handler(): - """Returns task logging handler.""" + """Return task logging handler.""" def get_fullname(o): module = o.__class__.__module__ @@ -314,7 +314,7 @@ def _providers_info(self): return [(p.data["package-name"], p.version) for p in ProvidersManager().providers.values()] def show(self, output: str, console: AirflowConsole | None = None) -> None: - """Shows information about Airflow instance.""" + """Show information about Airflow instance.""" all_info = { "Apache Airflow": self._airflow_info, "System info": self._system_info, @@ -336,7 +336,7 @@ def show(self, output: str, console: AirflowConsole | None = None) -> None: ) def render_text(self, output: str) -> str: - """Exports the info to string.""" + """Export the info to string.""" console = AirflowConsole(record=True) with console.capture(): self.show(output=output, console=console) diff --git a/airflow/cli/commands/internal_api_command.py b/airflow/cli/commands/internal_api_command.py index 72fe57c206a0..b9b77c093446 100644 --- a/airflow/cli/commands/internal_api_command.py +++ b/airflow/cli/commands/internal_api_command.py @@ -61,7 +61,7 @@ @cli_utils.action_cli @providers_configuration_loaded def internal_api(args): - """Starts Airflow Internal API.""" + """Start Airflow Internal API.""" print(settings.HEADER) access_logfile = args.access_logfile or "-" diff --git a/airflow/cli/commands/jobs_command.py b/airflow/cli/commands/jobs_command.py index b6509ea6424e..3f22241db9cb 100644 --- a/airflow/cli/commands/jobs_command.py +++ b/airflow/cli/commands/jobs_command.py @@ -29,7 +29,7 @@ @providers_configuration_loaded @provide_session def check(args, session: Session = NEW_SESSION) -> None: - """Checks if job(s) are still alive.""" + """Check if job(s) are still alive.""" if args.allow_multiple and not args.limit > 1: raise SystemExit("To use option --allow-multiple, you must set the limit to a value greater than 1.") if args.hostname and args.local: diff --git a/airflow/cli/commands/kubernetes_command.py b/airflow/cli/commands/kubernetes_command.py index 056465577fcd..038f53e3f370 100644 --- a/airflow/cli/commands/kubernetes_command.py +++ b/airflow/cli/commands/kubernetes_command.py @@ -39,7 +39,7 @@ @cli_utils.action_cli @providers_configuration_loaded def generate_pod_yaml(args): - """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor.""" + """Generate yaml files for each task in the DAG. Used for testing output of KubernetesExecutor.""" execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path @@ -151,7 +151,11 @@ def cleanup_pods(args): def _delete_pod(name, namespace): - """Helper Function for cleanup_pods.""" + """ + Delete a namespaced pod. + + Helper Function for cleanup_pods. + """ kube_client = get_kube_client() delete_options = client.V1DeleteOptions() print(f'Deleting POD "{name}" from "{namespace}" namespace') diff --git a/airflow/cli/commands/legacy_commands.py b/airflow/cli/commands/legacy_commands.py index 910c6e442703..4338cf1c339d 100644 --- a/airflow/cli/commands/legacy_commands.py +++ b/airflow/cli/commands/legacy_commands.py @@ -50,7 +50,7 @@ def check_legacy_command(action, value): - """Checks command value and raise error if value is in removed command.""" + """Check command value and raise error if value is in removed command.""" new_command = COMMAND_MAP.get(value) if new_command is not None: msg = f"`airflow {value}` command, has been removed, please use `airflow {new_command}`" diff --git a/airflow/cli/commands/pool_command.py b/airflow/cli/commands/pool_command.py index 7e7c63640cb6..b26f2032a85b 100644 --- a/airflow/cli/commands/pool_command.py +++ b/airflow/cli/commands/pool_command.py @@ -46,7 +46,7 @@ def _show_pools(pools, output): @suppress_logs_and_warning @providers_configuration_loaded def pool_list(args): - """Displays info of all the pools.""" + """Display info of all the pools.""" api_client = get_current_api_client() pools = api_client.get_pools() _show_pools(pools=pools, output=args.output) @@ -55,7 +55,7 @@ def pool_list(args): @suppress_logs_and_warning @providers_configuration_loaded def pool_get(args): - """Displays pool info by a given name.""" + """Display pool info by a given name.""" api_client = get_current_api_client() try: pools = [api_client.get_pool(name=args.pool)] @@ -68,7 +68,7 @@ def pool_get(args): @suppress_logs_and_warning @providers_configuration_loaded def pool_set(args): - """Creates new pool with a given name and slots.""" + """Create new pool with a given name and slots.""" api_client = get_current_api_client() api_client.create_pool( name=args.pool, slots=args.slots, description=args.description, include_deferred=args.include_deferred @@ -80,7 +80,7 @@ def pool_set(args): @suppress_logs_and_warning @providers_configuration_loaded def pool_delete(args): - """Deletes pool by a given name.""" + """Delete pool by a given name.""" api_client = get_current_api_client() try: api_client.delete_pool(name=args.pool) @@ -93,7 +93,7 @@ def pool_delete(args): @suppress_logs_and_warning @providers_configuration_loaded def pool_import(args): - """Imports pools from the file.""" + """Import pools from the file.""" if not os.path.exists(args.file): raise SystemExit(f"Missing pools file {args.file}") pools, failed = pool_import_helper(args.file) @@ -104,13 +104,13 @@ def pool_import(args): @providers_configuration_loaded def pool_export(args): - """Exports all the pools to the file.""" + """Export all the pools to the file.""" pools = pool_export_helper(args.file) print(f"Exported {len(pools)} pools to {args.file}") def pool_import_helper(filepath): - """Helps import pools from the json file.""" + """Help import pools from the json file.""" api_client = get_current_api_client() with open(filepath) as poolfile: @@ -137,7 +137,7 @@ def pool_import_helper(filepath): def pool_export_helper(filepath): - """Helps export all the pools to the json file.""" + """Help export all the pools to the json file.""" api_client = get_current_api_client() pool_dict = {} pools = api_client.get_pools() diff --git a/airflow/cli/commands/provider_command.py b/airflow/cli/commands/provider_command.py index 76e81a105452..82f25a2a495b 100644 --- a/airflow/cli/commands/provider_command.py +++ b/airflow/cli/commands/provider_command.py @@ -58,7 +58,7 @@ def provider_get(args): @suppress_logs_and_warning @providers_configuration_loaded def providers_list(args): - """Lists all providers at the command line.""" + """List all providers at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().providers.values()), output=args.output, @@ -73,7 +73,7 @@ def providers_list(args): @suppress_logs_and_warning @providers_configuration_loaded def hooks_list(args): - """Lists all hooks at the command line.""" + """List all hooks at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().hooks.items()), output=args.output, @@ -104,7 +104,7 @@ def triggers_list(args): @suppress_logs_and_warning @providers_configuration_loaded def connection_form_widget_list(args): - """Lists all custom connection form fields at the command line.""" + """List all custom connection form fields at the command line.""" AirflowConsole().print_as( data=sorted(ProvidersManager().connection_form_widgets.items()), output=args.output, @@ -120,7 +120,7 @@ def connection_form_widget_list(args): @suppress_logs_and_warning @providers_configuration_loaded def connection_field_behaviours(args): - """Lists field behaviours.""" + """List field behaviours.""" AirflowConsole().print_as( data=list(ProvidersManager().field_behaviours), output=args.output, @@ -133,7 +133,7 @@ def connection_field_behaviours(args): @suppress_logs_and_warning @providers_configuration_loaded def extra_links_list(args): - """Lists all extra links at the command line.""" + """List all extra links at the command line.""" AirflowConsole().print_as( data=ProvidersManager().extra_links_class_names, output=args.output, @@ -146,7 +146,7 @@ def extra_links_list(args): @suppress_logs_and_warning @providers_configuration_loaded def logging_list(args): - """Lists all log task handlers at the command line.""" + """List all log task handlers at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().logging_class_names), output=args.output, @@ -159,7 +159,7 @@ def logging_list(args): @suppress_logs_and_warning @providers_configuration_loaded def secrets_backends_list(args): - """Lists all secrets backends at the command line.""" + """List all secrets backends at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().secrets_backend_class_names), output=args.output, @@ -172,7 +172,7 @@ def secrets_backends_list(args): @suppress_logs_and_warning @providers_configuration_loaded def auth_backend_list(args): - """Lists all API auth backend modules at the command line.""" + """List all API auth backend modules at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().auth_backend_module_names), output=args.output, @@ -185,7 +185,7 @@ def auth_backend_list(args): @suppress_logs_and_warning @providers_configuration_loaded def executors_list(args): - """Lists all executors at the command line.""" + """List all executors at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().executor_class_names), output=args.output, @@ -198,7 +198,7 @@ def executors_list(args): @suppress_logs_and_warning @providers_configuration_loaded def config_list(args): - """Lists all configurations at the command line.""" + """List all configurations at the command line.""" AirflowConsole().print_as( data=list(ProvidersManager().provider_configs), output=args.output, diff --git a/airflow/cli/commands/role_command.py b/airflow/cli/commands/role_command.py index 180e2dd2a7cb..a582b3319532 100644 --- a/airflow/cli/commands/role_command.py +++ b/airflow/cli/commands/role_command.py @@ -34,7 +34,7 @@ @suppress_logs_and_warning @providers_configuration_loaded def roles_list(args): - """Lists all existing roles.""" + """List all existing roles.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: @@ -62,7 +62,7 @@ def roles_list(args): @suppress_logs_and_warning @providers_configuration_loaded def roles_create(args): - """Creates new empty role in DB.""" + """Create new empty role in DB.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: @@ -75,7 +75,7 @@ def roles_create(args): @suppress_logs_and_warning @providers_configuration_loaded def roles_delete(args): - """Deletes role in DB.""" + """Delete role in DB.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: @@ -144,7 +144,7 @@ def __roles_add_or_remove_permissions(args): @suppress_logs_and_warning @providers_configuration_loaded def roles_add_perms(args): - """Adds permissions to role in DB.""" + """Add permissions to role in DB.""" __roles_add_or_remove_permissions(args) @@ -152,7 +152,7 @@ def roles_add_perms(args): @suppress_logs_and_warning @providers_configuration_loaded def roles_del_perms(args): - """Deletes permissions from role in DB.""" + """Delete permissions from role in DB.""" __roles_add_or_remove_permissions(args) @@ -160,7 +160,7 @@ def roles_del_perms(args): @providers_configuration_loaded def roles_export(args): """ - Exports all the roles from the database to a file. + Export all the roles from the database to a file. Note, this function does not export the permissions associated for each role. Strictly, it exports the role names into the passed role json file. diff --git a/airflow/cli/commands/scheduler_command.py b/airflow/cli/commands/scheduler_command.py index ad0063ffa3d0..fd25951ad322 100644 --- a/airflow/cli/commands/scheduler_command.py +++ b/airflow/cli/commands/scheduler_command.py @@ -52,7 +52,7 @@ def _run_scheduler_job(job_runner: SchedulerJobRunner, *, skip_serve_logs: bool) @cli_utils.action_cli @providers_configuration_loaded def scheduler(args): - """Starts Airflow Scheduler.""" + """Start Airflow Scheduler.""" print(settings.HEADER) job_runner = SchedulerJobRunner( @@ -87,7 +87,7 @@ def scheduler(args): @contextmanager def _serve_logs(skip_serve_logs: bool = False): - """Starts serve_logs sub-process.""" + """Start serve_logs sub-process.""" from airflow.utils.serve_logs import serve_logs sub_proc = None @@ -103,7 +103,7 @@ def _serve_logs(skip_serve_logs: bool = False): @contextmanager def _serve_health_check(enable_health_check: bool = False): - """Starts serve_health_check sub-process.""" + """Start serve_health_check sub-process.""" sub_proc = None if enable_health_check: sub_proc = Process(target=serve_health_check) diff --git a/airflow/cli/commands/standalone_command.py b/airflow/cli/commands/standalone_command.py index 68abfdd0002b..ceae1c6dcaec 100644 --- a/airflow/cli/commands/standalone_command.py +++ b/airflow/cli/commands/standalone_command.py @@ -59,7 +59,6 @@ def __init__(self): @providers_configuration_loaded def run(self): - """Main run loop.""" self.print_output("standalone", "Starting Airflow Standalone") # Silence built-in logging at INFO logging.getLogger("").setLevel(logging.WARNING) @@ -130,7 +129,7 @@ def update_output(self): def print_output(self, name: str, output): """ - Prints an output line with name and colouring. + Print an output line with name and colouring. You can pass multiple lines to output if you wish; it will be split for you. """ @@ -146,7 +145,7 @@ def print_output(self, name: str, output): def print_error(self, name: str, output): """ - Prints an error message to the console. + Print an error message to the console. This is the same as print_output but with the text red """ @@ -172,7 +171,7 @@ def calculate_env(self): return env def initialize_database(self): - """Makes sure all the tables are created.""" + """Make sure all the tables are created.""" # Set up DB tables self.print_output("standalone", "Checking database is initialized") db.initdb() @@ -214,7 +213,7 @@ def initialize_database(self): def is_ready(self): """ - Detects when all Airflow components are ready to serve. + Detect when all Airflow components are ready to serve. For now, it's simply time-based. """ @@ -226,7 +225,7 @@ def is_ready(self): def port_open(self, port): """ - Checks if the given port is listening on the local machine. + Check if the given port is listening on the local machine. Used to tell if webserver is alive. """ @@ -242,7 +241,7 @@ def port_open(self, port): def job_running(self, job_runner_class: type[BaseJobRunner]): """ - Checks if the given job name is running and heartbeating correctly. + Check if the given job name is running and heartbeating correctly. Used to tell if scheduler is alive. """ @@ -253,7 +252,7 @@ def job_running(self, job_runner_class: type[BaseJobRunner]): def print_ready(self): """ - Prints the banner shown when Airflow is ready to go. + Print the banner shown when Airflow is ready to go. Include with login details. """ @@ -288,7 +287,7 @@ def __init__(self, parent, name: str, command: list[str], env: dict[str, str]): self.env = env def run(self): - """Runs the actual process and captures it output to a queue.""" + """Run the actual process and captures it output to a queue.""" self.process = subprocess.Popen( ["airflow"] + self.command, stdout=subprocess.PIPE, diff --git a/airflow/cli/commands/sync_perm_command.py b/airflow/cli/commands/sync_perm_command.py index ab458b2d93b4..4d4e280637f9 100644 --- a/airflow/cli/commands/sync_perm_command.py +++ b/airflow/cli/commands/sync_perm_command.py @@ -25,7 +25,7 @@ @cli_utils.action_cli @providers_configuration_loaded def sync_perm(args): - """Updates permissions for existing roles and DAGs.""" + """Update permissions for existing roles and DAGs.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: diff --git a/airflow/cli/commands/task_command.py b/airflow/cli/commands/task_command.py index da47e8dd8b37..205d87b2f034 100644 --- a/airflow/cli/commands/task_command.py +++ b/airflow/cli/commands/task_command.py @@ -196,7 +196,7 @@ def _get_ti( def _run_task_by_selected_method(args, dag: DAG, ti: TaskInstance) -> None | TaskReturnCode: """ - Runs the task based on a mode. + Run the task based on a mode. Any of the 3 modes are available: @@ -214,7 +214,7 @@ def _run_task_by_selected_method(args, dag: DAG, ti: TaskInstance) -> None | Tas def _run_task_by_executor(args, dag: DAG, ti: TaskInstance) -> None: """ - Sends the task to the executor for execution. + Send the task to the executor for execution. This can result in the task being started by another host if the executor implementation does. """ @@ -285,7 +285,7 @@ def _run_task_by_local_task_job(args, ti: TaskInstance) -> TaskReturnCode | None def _run_raw_task(args, ti: TaskInstance) -> None | TaskReturnCode: - """Runs the main task handling code.""" + """Run the main task handling code.""" return ti._run_raw_task( mark_success=args.mark_success, job_id=args.job_id, @@ -473,7 +473,7 @@ def task_failed_deps(args) -> None: @providers_configuration_loaded def task_state(args) -> None: """ - Returns the state of a TaskInstance at the command line. + Return the state of a TaskInstance at the command line. >>> airflow tasks state tutorial sleep 2015-01-01 success @@ -488,7 +488,7 @@ def task_state(args) -> None: @suppress_logs_and_warning @providers_configuration_loaded def task_list(args, dag: DAG | None = None) -> None: - """Lists the tasks within a DAG at the command line.""" + """List the tasks within a DAG at the command line.""" dag = dag or get_dag(args.subdir, args.dag_id) if args.tree: dag.tree_view() @@ -512,7 +512,7 @@ def post_mortem(self) -> None: def _guess_debugger() -> _SupportedDebugger: """ - Trying to guess the debugger used by the user. + Try to guess the debugger used by the user. When it doesn't find any user-installed debugger, returns ``pdb``. @@ -576,7 +576,7 @@ def format_task_instance(ti: TaskInstance) -> dict[str, str]: @cli_utils.action_cli(check_db=False) def task_test(args, dag: DAG | None = None) -> None: - """Tests task for a given dag_id.""" + """Test task for a given dag_id.""" # We want to log output from operators etc to show up here. Normally # airflow.task would redirect to a file, but here we want it to propagate # up to the normal airflow handler. @@ -638,7 +638,7 @@ def task_test(args, dag: DAG | None = None) -> None: @suppress_logs_and_warning @providers_configuration_loaded def task_render(args, dag: DAG | None = None) -> None: - """Renders and displays templated fields for a given task.""" + """Render and displays templated fields for a given task.""" if not dag: dag = get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) @@ -661,7 +661,7 @@ def task_render(args, dag: DAG | None = None) -> None: @cli_utils.action_cli(check_db=False) @providers_configuration_loaded def task_clear(args) -> None: - """Clears all task instances or only those matched by regex for a DAG(s).""" + """Clear all task instances or only those matched by regex for a DAG(s).""" logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) if args.dag_id and not args.subdir and not args.dag_regex and not args.task_regex: diff --git a/airflow/cli/commands/triggerer_command.py b/airflow/cli/commands/triggerer_command.py index c7d0827bd8e5..2288f1537fbb 100644 --- a/airflow/cli/commands/triggerer_command.py +++ b/airflow/cli/commands/triggerer_command.py @@ -38,7 +38,7 @@ @contextmanager def _serve_logs(skip_serve_logs: bool = False) -> Generator[None, None, None]: - """Starts serve_logs sub-process.""" + """Start serve_logs sub-process.""" sub_proc = None if skip_serve_logs is False: port = conf.getint("logging", "trigger_log_server_port", fallback=8794) @@ -54,7 +54,7 @@ def _serve_logs(skip_serve_logs: bool = False) -> Generator[None, None, None]: @cli_utils.action_cli @providers_configuration_loaded def triggerer(args): - """Starts Airflow Triggerer.""" + """Start Airflow Triggerer.""" settings.MASK_SECRETS_IN_LOGS = True print(settings.HEADER) triggerer_heartrate = conf.getfloat("triggerer", "JOB_HEARTBEAT_SEC") diff --git a/airflow/cli/commands/user_command.py b/airflow/cli/commands/user_command.py index 1553d27a0156..bc982719c94d 100644 --- a/airflow/cli/commands/user_command.py +++ b/airflow/cli/commands/user_command.py @@ -49,7 +49,7 @@ class UserSchema(Schema): @suppress_logs_and_warning @providers_configuration_loaded def users_list(args): - """Lists users at the command line.""" + """List users at the command line.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: @@ -64,7 +64,7 @@ def users_list(args): @cli_utils.action_cli(check_db=True) @providers_configuration_loaded def users_create(args): - """Creates new user in the DB.""" + """Create new user in the DB.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: @@ -113,7 +113,7 @@ def _find_user(args): @cli_utils.action_cli @providers_configuration_loaded def users_delete(args): - """Deletes user from DB.""" + """Delete user from DB.""" user = _find_user(args) # Clear the associated user roles first. @@ -131,7 +131,7 @@ def users_delete(args): @cli_utils.action_cli @providers_configuration_loaded def users_manage_role(args, remove=False): - """Deletes or appends user roles.""" + """Delete or appends user roles.""" user = _find_user(args) from airflow.utils.cli_app_builder import get_application_builder @@ -160,7 +160,7 @@ def users_manage_role(args, remove=False): @providers_configuration_loaded def users_export(args): - """Exports all users to the json file.""" + """Export all users to the json file.""" from airflow.utils.cli_app_builder import get_application_builder with get_application_builder() as appbuilder: @@ -190,7 +190,7 @@ def remove_underscores(s): @cli_utils.action_cli @providers_configuration_loaded def users_import(args): - """Imports users from the json file.""" + """Import users from the json file.""" json_file = getattr(args, "import") if not os.path.exists(json_file): raise SystemExit(f"File '{json_file}' does not exist") diff --git a/airflow/cli/commands/variable_command.py b/airflow/cli/commands/variable_command.py index 34b46530d562..983ddfb3520b 100644 --- a/airflow/cli/commands/variable_command.py +++ b/airflow/cli/commands/variable_command.py @@ -35,7 +35,7 @@ @suppress_logs_and_warning @providers_configuration_loaded def variables_list(args): - """Displays all the variables.""" + """Display all the variables.""" with create_session() as session: variables = session.scalars(select(Variable)).all() AirflowConsole().print_as(data=variables, output=args.output, mapper=lambda x: {"key": x.key}) @@ -44,7 +44,7 @@ def variables_list(args): @suppress_logs_and_warning @providers_configuration_loaded def variables_get(args): - """Displays variable by a given name.""" + """Display variable by a given name.""" try: if args.default is None: var = Variable.get(args.key, deserialize_json=args.json) @@ -59,7 +59,7 @@ def variables_get(args): @cli_utils.action_cli @providers_configuration_loaded def variables_set(args): - """Creates new variable with a given name and value.""" + """Create new variable with a given name and value.""" Variable.set(args.key, args.value, serialize_json=args.json) print(f"Variable {args.key} created") @@ -67,7 +67,7 @@ def variables_set(args): @cli_utils.action_cli @providers_configuration_loaded def variables_delete(args): - """Deletes variable by a given name.""" + """Delete variable by a given name.""" Variable.delete(args.key) print(f"Variable {args.key} deleted") @@ -75,7 +75,7 @@ def variables_delete(args): @cli_utils.action_cli @providers_configuration_loaded def variables_import(args): - """Imports variables from a given file.""" + """Import variables from a given file.""" if os.path.exists(args.file): _import_helper(args.file) else: @@ -84,12 +84,12 @@ def variables_import(args): @providers_configuration_loaded def variables_export(args): - """Exports all the variables to the file.""" + """Export all the variables to the file.""" _variable_export_helper(args.file) def _import_helper(filepath): - """Helps import variables from the file.""" + """Help import variables from the file.""" with open(filepath) as varfile: data = varfile.read() diff --git a/airflow/cli/commands/version_command.py b/airflow/cli/commands/version_command.py index d3b735951c1f..81f56f306213 100644 --- a/airflow/cli/commands/version_command.py +++ b/airflow/cli/commands/version_command.py @@ -21,5 +21,5 @@ def version(args): - """Displays Airflow version at the command line.""" + """Display Airflow version at the command line.""" print(airflow.__version__) diff --git a/airflow/cli/commands/webserver_command.py b/airflow/cli/commands/webserver_command.py index 17d48ad8dae4..00ac66372465 100644 --- a/airflow/cli/commands/webserver_command.py +++ b/airflow/cli/commands/webserver_command.py @@ -132,7 +132,7 @@ def _get_file_hash(fname: str): return hash_md5.hexdigest() def _get_num_ready_workers_running(self) -> int: - """Returns number of ready Gunicorn workers by looking for READY_PREFIX in process name.""" + """Return number of ready Gunicorn workers by looking for READY_PREFIX in process name.""" workers = psutil.Process(self.gunicorn_master_proc.pid).children() def ready_prefix_on_cmdline(proc): @@ -148,12 +148,12 @@ def ready_prefix_on_cmdline(proc): return len(ready_workers) def _get_num_workers_running(self) -> int: - """Returns number of running Gunicorn workers processes.""" + """Return number of running Gunicorn workers processes.""" workers = psutil.Process(self.gunicorn_master_proc.pid).children() return len(workers) def _wait_until_true(self, fn, timeout: int = 0) -> None: - """Sleeps until fn is true.""" + """Sleep until fn is true.""" start_time = time.monotonic() while not fn(): if 0 < timeout <= time.monotonic() - start_time: @@ -207,7 +207,7 @@ def _reload_gunicorn(self) -> None: ) def start(self) -> NoReturn: - """Starts monitoring the webserver.""" + """Start monitoring the webserver.""" try: self._wait_until_true( lambda: self.num_workers_expected == self._get_num_workers_running(), @@ -323,7 +323,7 @@ def _check_workers(self) -> None: @cli_utils.action_cli @providers_configuration_loaded def webserver(args): - """Starts Airflow Webserver.""" + """Start Airflow Webserver.""" print(settings.HEADER) # Check for old/insecure config, and fail safe (i.e. don't launch) if the config is wildly insecure. diff --git a/airflow/cli/simple_table.py b/airflow/cli/simple_table.py index b9338eb6a38d..f4f418742bfd 100644 --- a/airflow/cli/simple_table.py +++ b/airflow/cli/simple_table.py @@ -48,17 +48,17 @@ def __init__(self, show_header: bool = True, *args, **kwargs): self.show_header = show_header def print_as_json(self, data: dict): - """Renders dict as json text representation.""" + """Render dict as json text representation.""" json_content = json.dumps(data) self.print(Syntax(json_content, "json", theme="ansi_dark"), soft_wrap=True) def print_as_yaml(self, data: dict): - """Renders dict as yaml text representation.""" + """Render dict as yaml text representation.""" yaml_content = yaml.dump(data) self.print(Syntax(yaml_content, "yaml", theme="ansi_dark"), soft_wrap=True) def print_as_table(self, data: list[dict]): - """Renders list of dictionaries as table.""" + """Render list of dictionaries as table.""" if not data: self.print("No data found") return @@ -72,7 +72,7 @@ def print_as_table(self, data: list[dict]): self.print(table) def print_as_plain_table(self, data: list[dict]): - """Renders list of dictionaries as a simple table than can be easily piped.""" + """Render list of dictionaries as a simple table than can be easily piped.""" if not data: self.print("No data found") return @@ -99,7 +99,7 @@ def print_as( output: str, mapper: Callable[[Any], dict] | None = None, ) -> None: - """Prints provided using format specified by output argument.""" + """Print provided using format specified by output argument.""" output_to_renderer: dict[str, Callable[[Any], None]] = { "json": self.print_as_json, "yaml": self.print_as_yaml, diff --git a/airflow/dag_processing/manager.py b/airflow/dag_processing/manager.py index 04697650db03..534763843791 100644 --- a/airflow/dag_processing/manager.py +++ b/airflow/dag_processing/manager.py @@ -167,10 +167,10 @@ def start(self) -> None: def run_single_parsing_loop(self) -> None: """ - Should only be used when launched DAG file processor manager in sync mode. - Send agent heartbeat signal to the manager, requesting that it runs one processing "loop". + Should only be used when launched DAG file processor manager in sync mode. + Call wait_until_finished to ensure that any launched processors have finished before continuing. """ if not self._parent_signal_conn or not self._process: @@ -186,13 +186,13 @@ def run_single_parsing_loop(self) -> None: pass def get_callbacks_pipe(self) -> MultiprocessingConnection: - """Returns the pipe for sending Callbacks to DagProcessorManager.""" + """Return the pipe for sending Callbacks to DagProcessorManager.""" if not self._parent_signal_conn: raise ValueError("Process not started.") return self._parent_signal_conn def wait_until_finished(self) -> None: - """Waits until DAG parsing is finished.""" + """Wait until DAG parsing is finished.""" if not self._parent_signal_conn: raise ValueError("Process not started.") if self._async_mode: @@ -466,7 +466,7 @@ def register_exit_signals(self): signal.signal(signal.SIGUSR2, signal.SIG_IGN) def _exit_gracefully(self, signum, frame): - """Helper method to clean up DAG file processors to avoid leaving orphan processes.""" + """Clean up DAG file processors to avoid leaving orphan processes.""" self.log.info("Exiting gracefully upon receiving signal %s", signum) self.log.debug("Current Stacktrace is: %s", "\n".join(map(str, inspect.stack()))) self.terminate() @@ -519,7 +519,7 @@ def deactivate_stale_dags( session: Session = NEW_SESSION, ): """ - Detects DAGs which are no longer present in files. + Detect DAGs which are no longer present in files. Deactivate them and remove them in the serialized_dag table. """ @@ -695,7 +695,7 @@ def _fetch_callbacks(self, max_callbacks: int, session: Session = NEW_SESSION): @retry_db_transaction def _fetch_callbacks_with_retries(self, max_callbacks: int, session: Session): - """Fetches callbacks from database and add them to the internal queue for execution.""" + """Fetch callbacks from database and add them to the internal queue for execution.""" self.log.debug("Fetching callbacks from the database.") with prohibit_commit(session) as guard: query = session.query(DbCallbackRequest) @@ -811,7 +811,7 @@ def _print_stat(self): @provide_session def clear_nonexistent_import_errors(file_paths: list[str] | None, session=NEW_SESSION): """ - Clears import errors for files that no longer exist. + Clear import errors for files that no longer exist. :param file_paths: list of paths to DAG definition files :param session: session for ORM operations @@ -960,7 +960,7 @@ def get_start_time(self, file_path) -> datetime | None: def get_run_count(self, file_path) -> int: """ - The number of times the given file has been parsed. + Return the number of times the given file has been parsed. :param file_path: the path to the file that's being processed. """ @@ -968,7 +968,7 @@ def get_run_count(self, file_path) -> int: return stat.run_count if stat else 0 def get_dag_directory(self) -> str: - """Returns the dag_director as a string.""" + """Return the dag_director as a string.""" if isinstance(self._dag_directory, Path): return str(self._dag_directory.resolve()) else: @@ -1063,7 +1063,7 @@ def collect_results(self) -> None: @staticmethod def _create_process(file_path, pickle_dags, dag_ids, dag_directory, callback_requests): - """Creates DagFileProcessorProcess instance.""" + """Create DagFileProcessorProcess instance.""" return DagFileProcessorProcess( file_path=file_path, pickle_dags=pickle_dags, @@ -1240,7 +1240,7 @@ def _kill_timed_out_processors(self): self._processors.pop(proc) def _add_paths_to_queue(self, file_paths_to_enqueue: list[str], add_at_front: bool): - """Adds stuff to the back or front of the file queue, unless it's already present.""" + """Add stuff to the back or front of the file queue, unless it's already present.""" new_file_paths = list(p for p in file_paths_to_enqueue if p not in self._file_path_queue) if add_at_front: self._file_path_queue.extendleft(new_file_paths) @@ -1260,7 +1260,7 @@ def max_runs_reached(self): return True def terminate(self): - """Stops all running processors.""" + """Stop all running processors.""" for processor in self._processors.values(): Stats.decr( "dag_processing.processes", tags={"file_path": processor.file_path, "action": "terminate"} diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index 8b0ecb364070..162fc5889c89 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -415,7 +415,7 @@ def __init__(self, dag_ids: list[str] | None, dag_directory: str, log: logging.L @provide_session def manage_slas(cls, dag_folder, dag_id: str, session: Session = NEW_SESSION) -> None: """ - Finding all tasks that have SLAs defined, and sending alert emails when needed. + Find all tasks that have SLAs defined, and send alert emails when needed. New SLA misses are also recorded in the database. @@ -645,7 +645,7 @@ def update_import_errors( @provide_session def _validate_task_pools(self, *, dagbag: DagBag, session: Session = NEW_SESSION): - """Validates and raise exception if any task in a dag is using a non-existent pool.""" + """Validate and raise exception if any task in a dag is using a non-existent pool.""" from airflow.models.pool import Pool def check_pools(dag): diff --git a/airflow/decorators/base.py b/airflow/decorators/base.py index af37f191be2d..cf8e650034f9 100644 --- a/airflow/decorators/base.py +++ b/airflow/decorators/base.py @@ -223,7 +223,7 @@ def execute(self, context: Context): def _handle_output(self, return_value: Any, context: Context, xcom_push: Callable): """ - Handles logic for whether a decorator needs to push a single return value or multiple return values. + Handle logic for whether a decorator needs to push a single return value or multiple return values. It sets outlets if any datasets are found in the returned value(s) diff --git a/airflow/decorators/branch_python.py b/airflow/decorators/branch_python.py index 4dcff0a361f9..39105b007275 100644 --- a/airflow/decorators/branch_python.py +++ b/airflow/decorators/branch_python.py @@ -33,7 +33,7 @@ def branch_task( python_callable: Callable | None = None, multiple_outputs: bool | None = None, **kwargs ) -> TaskDecorator: """ - Wraps a python function into a BranchPythonOperator. + Wrap a python function into a BranchPythonOperator. For more information on how to use this operator, take a look at the guide: :ref:`concepts:branching` diff --git a/airflow/decorators/external_python.py b/airflow/decorators/external_python.py index 1f083a144a77..f17a9f3f1caf 100644 --- a/airflow/decorators/external_python.py +++ b/airflow/decorators/external_python.py @@ -35,7 +35,8 @@ def external_python_task( multiple_outputs: bool | None = None, **kwargs, ) -> TaskDecorator: - """Wraps a callable into an Airflow operator to run via a Python virtual environment. + """ + Wrap a callable into an Airflow operator to run via a Python virtual environment. Accepts kwargs for operator kwarg. Can be reused in a single DAG. diff --git a/airflow/decorators/python.py b/airflow/decorators/python.py index f79631da57af..d4423a2092b6 100644 --- a/airflow/decorators/python.py +++ b/airflow/decorators/python.py @@ -60,7 +60,8 @@ def python_task( multiple_outputs: bool | None = None, **kwargs, ) -> TaskDecorator: - """Wraps a function into an Airflow operator. + """ + Wrap a function into an Airflow operator. Accepts kwargs for operator kwarg. Can be reused in a single DAG. diff --git a/airflow/decorators/python_virtualenv.py b/airflow/decorators/python_virtualenv.py index 123378d43c2c..9b632e0c87bb 100644 --- a/airflow/decorators/python_virtualenv.py +++ b/airflow/decorators/python_virtualenv.py @@ -34,7 +34,8 @@ def virtualenv_task( multiple_outputs: bool | None = None, **kwargs, ) -> TaskDecorator: - """Wraps a callable into an Airflow operator to run via a Python virtual environment. + """ + Wrap a callable into an Airflow operator to run via a Python virtual environment. Accepts kwargs for operator kwarg. Can be reused in a single DAG. diff --git a/airflow/decorators/sensor.py b/airflow/decorators/sensor.py index f25d1610fe19..e8363317db8e 100644 --- a/airflow/decorators/sensor.py +++ b/airflow/decorators/sensor.py @@ -60,7 +60,7 @@ def __init__( def sensor_task(python_callable: Callable | None = None, **kwargs) -> TaskDecorator: """ - Wraps a function into an Airflow operator. + Wrap a function into an Airflow operator. Accepts kwargs for operator kwarg. Can be reused in a single DAG. :param python_callable: Function to decorate diff --git a/airflow/decorators/short_circuit.py b/airflow/decorators/short_circuit.py index b6b2de563288..dd94daddd6a3 100644 --- a/airflow/decorators/short_circuit.py +++ b/airflow/decorators/short_circuit.py @@ -34,7 +34,8 @@ def short_circuit_task( multiple_outputs: bool | None = None, **kwargs, ) -> TaskDecorator: - """Wraps a function into an ShortCircuitOperator. + """ + Wrap a function into an ShortCircuitOperator. Accepts kwargs for operator kwarg. Can be reused in a single DAG. From 85d75a2f7883a924d554f017971bc36dae76ec93 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 12 Aug 2023 02:49:17 -0700 Subject: [PATCH 040/117] D401 Support - airflow/example_dags thru airflow/listeners (#33336) (cherry picked from commit d0c94d6bee2a9494e44f29c2c242c956877e9619) --- airflow/executors/base_executor.py | 18 ++++++------ airflow/executors/executor_loader.py | 13 +++++---- airflow/executors/local_executor.py | 22 ++++++++------ airflow/hooks/base.py | 6 ++-- airflow/hooks/filesystem.py | 2 +- airflow/hooks/subprocess.py | 2 +- airflow/jobs/backfill_job_runner.py | 2 +- airflow/jobs/base_job_runner.py | 12 ++++++-- airflow/jobs/job.py | 12 ++++---- airflow/jobs/local_task_job_runner.py | 4 +-- airflow/jobs/scheduler_job_runner.py | 10 +++---- airflow/jobs/triggerer_job_runner.py | 29 +++++++++---------- .../pre_7_4_0_compatibility/kube_client.py | 4 +-- .../pre_7_4_0_compatibility/pod_generator.py | 8 ++--- .../pod_generator_deprecated.py | 6 ++-- .../pre_7_4_0_compatibility/secret.py | 6 ++-- airflow/lineage/__init__.py | 4 +-- airflow/lineage/backend.py | 2 +- airflow/listeners/spec/dagrun.py | 6 ++-- airflow/listeners/spec/lifecycle.py | 4 +-- airflow/listeners/spec/taskinstance.py | 6 ++-- 21 files changed, 94 insertions(+), 84 deletions(-) diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py index 97de6be1faca..81c441b521f8 100644 --- a/airflow/executors/base_executor.py +++ b/airflow/executors/base_executor.py @@ -188,7 +188,7 @@ def queue_task_instance( def has_task(self, task_instance: TaskInstance) -> bool: """ - Checks if a task is either queued or running in this executor. + Check if a task is either queued or running in this executor. :param task_instance: TaskInstance :return: True if the task is known to this executor @@ -250,7 +250,7 @@ def order_queued_tasks_by_priority(self) -> list[tuple[TaskInstanceKey, QueuedTa def trigger_tasks(self, open_slots: int) -> None: """ - Initiates async execution of the queued tasks, up to the number of available slots. + Initiate async execution of the queued tasks, up to the number of available slots. :param open_slots: Number of open slots """ @@ -298,7 +298,7 @@ def _process_tasks(self, task_tuples: list[TaskTuple]) -> None: def change_state(self, key: TaskInstanceKey, state: TaskInstanceState, info=None) -> None: """ - Changes state of the task. + Change state of the task. :param info: Executor information for the task instance :param key: Unique key for the task instance @@ -358,7 +358,7 @@ def execute_async( executor_config: Any | None = None, ) -> None: # pragma: no cover """ - This method will execute the command asynchronously. + Execute the command asynchronously. :param key: Unique key for the task instance :param command: Command to run @@ -369,7 +369,7 @@ def execute_async( def get_task_log(self, ti: TaskInstance, try_number: int) -> tuple[list[str], list[str]]: """ - This method can be implemented by any child class to return the task logs. + Return the task logs. :param ti: A TaskInstance object :param try_number: current try_number to read log from @@ -382,7 +382,7 @@ def end(self) -> None: # pragma: no cover raise NotImplementedError() def terminate(self): - """This method is called when the daemon receives a SIGTERM.""" + """Get called when the daemon receives a SIGTERM.""" raise NotImplementedError() def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: # pragma: no cover @@ -458,7 +458,7 @@ def validate_airflow_tasks_run_command(command: list[str]) -> tuple[str | None, return None, None def debug_dump(self): - """Called in response to SIGUSR2 by the scheduler.""" + """Get called in response to SIGUSR2 by the scheduler.""" self.log.info( "executor.queued (%d)\n\t%s", len(self.queued_tasks), @@ -472,7 +472,7 @@ def debug_dump(self): ) def send_callback(self, request: CallbackRequest) -> None: - """Sends callback for execution. + """Send callback for execution. Provides a default implementation which sends the callback to the `callback_sink` object. @@ -493,7 +493,7 @@ def get_cli_commands() -> list[GroupCommand]: @classmethod def _get_parser(cls) -> argparse.ArgumentParser: - """This method is used by Sphinx argparse to generate documentation. + """Generate documentation; used by Sphinx argparse. :meta private: """ diff --git a/airflow/executors/executor_loader.py b/airflow/executors/executor_loader.py index 4a4bda85831a..73c1149515d0 100644 --- a/airflow/executors/executor_loader.py +++ b/airflow/executors/executor_loader.py @@ -72,7 +72,7 @@ class ExecutorLoader: @classmethod def get_default_executor_name(cls) -> str: - """Returns the default executor name from Airflow configuration. + """Return the default executor name from Airflow configuration. :return: executor name from Airflow configuration """ @@ -82,7 +82,7 @@ def get_default_executor_name(cls) -> str: @classmethod def get_default_executor(cls) -> BaseExecutor: - """Creates a new instance of the configured executor if none exists and returns it.""" + """Create a new instance of the configured executor if none exists and returns it.""" if cls._default_executor is not None: return cls._default_executor @@ -91,7 +91,7 @@ def get_default_executor(cls) -> BaseExecutor: @classmethod def load_executor(cls, executor_name: str) -> BaseExecutor: """ - Loads the executor. + Load the executor. This supports the following formats: * by executor name for core executor @@ -123,7 +123,7 @@ def import_executor_cls( cls, executor_name: str, validate: bool = True ) -> tuple[type[BaseExecutor], ConnectorSource]: """ - Imports the executor class. + Import the executor class. Supports the same formats as ExecutorLoader.load_executor. @@ -159,7 +159,7 @@ def _import_and_validate(path: str) -> type[BaseExecutor]: @classmethod def import_default_executor_cls(cls, validate: bool = True) -> tuple[type[BaseExecutor], ConnectorSource]: """ - Imports the default executor class. + Import the default executor class. :param validate: Whether or not to validate the executor before returning @@ -172,7 +172,8 @@ def import_default_executor_cls(cls, validate: bool = True) -> tuple[type[BaseEx @classmethod @functools.lru_cache(maxsize=None) def validate_database_executor_compatibility(cls, executor: type[BaseExecutor]) -> None: - """Validate database and executor compatibility. + """ + Validate database and executor compatibility. Most of the databases work universally, but SQLite can only work with single-threaded executors (e.g. Sequential). diff --git a/airflow/executors/local_executor.py b/airflow/executors/local_executor.py index 7f83f8c7a255..cf88ca13b2f5 100644 --- a/airflow/executors/local_executor.py +++ b/airflow/executors/local_executor.py @@ -75,7 +75,7 @@ def run(self): def execute_work(self, key: TaskInstanceKey, command: CommandType) -> None: """ - Executes command received and stores result state in queue. + Execute command received and stores result state in queue. :param key: the key to identify the task instance :param command: the command to execute @@ -141,7 +141,7 @@ def _execute_work_in_fork(self, command: CommandType) -> TaskInstanceState: @abstractmethod def do_work(self): - """Called in the subprocess and should then execute tasks.""" + """Execute tasks; called in the subprocess.""" raise NotImplementedError() @@ -236,7 +236,7 @@ def __init__(self, executor: LocalExecutor): self.executor: LocalExecutor = executor def start(self) -> None: - """Starts the executor.""" + """Start the executor.""" self.executor.workers_used = 0 self.executor.workers_active = 0 @@ -248,7 +248,7 @@ def execute_async( executor_config: Any | None = None, ) -> None: """ - Executes task asynchronously. + Execute task asynchronously. :param key: the key to identify the task instance :param command: the command to execute @@ -291,7 +291,7 @@ def __init__(self, executor: LocalExecutor): self.queue: Queue[ExecutorWorkType] | None = None def start(self) -> None: - """Starts limited parallelism implementation.""" + """Start limited parallelism implementation.""" if TYPE_CHECKING: assert self.executor.manager assert self.executor.result_queue @@ -315,7 +315,7 @@ def execute_async( executor_config: Any | None = None, ) -> None: """ - Executes task asynchronously. + Execute task asynchronously. :param key: the key to identify the task instance :param command: the command to execute @@ -340,7 +340,11 @@ def sync(self): break def end(self): - """Ends the executor. Sends the poison pill to all workers.""" + """ + End the executor. + + Sends the poison pill to all workers. + """ for _ in self.executor.workers: self.queue.put((None, None)) @@ -349,7 +353,7 @@ def end(self): self.executor.sync() def start(self) -> None: - """Starts the executor.""" + """Start the executor.""" old_proctitle = getproctitle() setproctitle("airflow executor -- LocalExecutor") self.manager = Manager() @@ -389,7 +393,7 @@ def sync(self) -> None: self.impl.sync() def end(self) -> None: - """Ends the executor.""" + """End the executor.""" if TYPE_CHECKING: assert self.impl assert self.manager diff --git a/airflow/hooks/base.py b/airflow/hooks/base.py index c1c758a75615..0974813c5a7a 100644 --- a/airflow/hooks/base.py +++ b/airflow/hooks/base.py @@ -76,7 +76,7 @@ def get_connection(cls, conn_id: str) -> Connection: @classmethod def get_hook(cls, conn_id: str) -> BaseHook: """ - Returns default hook for this connection id. + Return default hook for this connection id. :param conn_id: connection id :return: default hook for this connection @@ -85,7 +85,7 @@ def get_hook(cls, conn_id: str) -> BaseHook: return connection.get_hook() def get_conn(self) -> Any: - """Returns connection for the hook.""" + """Return connection for the hook.""" raise NotImplementedError() @classmethod @@ -144,7 +144,7 @@ def get_ui_field_behaviour(cls): @staticmethod def get_connection_form_widgets() -> dict[str, Any]: """ - Returns dictionary of widgets to be added for the hook to handle extra values. + Return dictionary of widgets to be added for the hook to handle extra values. If you have class hierarchy, usually the widgets needed by your class are already added by the base class, so there is no need to implement this method. It might diff --git a/airflow/hooks/filesystem.py b/airflow/hooks/filesystem.py index 39517e8cdc1a..84b50aea6a39 100644 --- a/airflow/hooks/filesystem.py +++ b/airflow/hooks/filesystem.py @@ -22,7 +22,7 @@ class FSHook(BaseHook): """ - Allows for interaction with an file server. + Allow for interaction with an file server. Connection should have a name and a path specified under extra: diff --git a/airflow/hooks/subprocess.py b/airflow/hooks/subprocess.py index af901789e433..051b4cf66280 100644 --- a/airflow/hooks/subprocess.py +++ b/airflow/hooks/subprocess.py @@ -100,7 +100,7 @@ def pre_exec(): return SubprocessResult(exit_code=return_code, output=line) def send_sigterm(self): - """Sends SIGTERM signal to ``self.sub_process`` if one exists.""" + """Send SIGTERM signal to ``self.sub_process`` if one exists.""" self.log.info("Sending SIGTERM signal to process group") if self.sub_process and hasattr(self.sub_process, "pid"): os.killpg(os.getpgid(self.sub_process.pid), signal.SIGTERM) diff --git a/airflow/jobs/backfill_job_runner.py b/airflow/jobs/backfill_job_runner.py index 3d2c20c61253..2508ba69ab3b 100644 --- a/airflow/jobs/backfill_job_runner.py +++ b/airflow/jobs/backfill_job_runner.py @@ -170,7 +170,7 @@ def __init__( def _update_counters(self, ti_status: _DagRunTaskStatus, session: Session) -> None: """ - Updates the counters per state of the tasks that were running. + Update the counters per state of the tasks that were running. Can re-add to tasks to run when required. diff --git a/airflow/jobs/base_job_runner.py b/airflow/jobs/base_job_runner.py index fd3060db813c..611579b239e9 100644 --- a/airflow/jobs/base_job_runner.py +++ b/airflow/jobs/base_job_runner.py @@ -46,7 +46,9 @@ def __init__(self, job: J) -> None: def _execute(self) -> int | None: """ - Executes the logic connected to the runner. This method should be overridden by subclasses. + Execute the logic connected to the runner. + + This method should be overridden by subclasses. :meta private: :return: return code if available, otherwise None @@ -55,12 +57,16 @@ def _execute(self) -> int | None: @provide_session def heartbeat_callback(self, session: Session = NEW_SESSION) -> None: - """Callback that is called during heartbeat. This method can be overwritten by the runners.""" + """ + Execute callback during heartbeat. + + This method can be overwritten by the runners. + """ @classmethod @provide_session def most_recent_job(cls, session: Session = NEW_SESSION) -> Job | None: - """Returns the most recent job of this type, if any, based on last heartbeat received.""" + """Return the most recent job of this type, if any, based on last heartbeat received.""" from airflow.jobs.job import most_recent_job return most_recent_job(cls.job_type, session=session) diff --git a/airflow/jobs/job.py b/airflow/jobs/job.py index 264eed15aa9f..f20808868db1 100644 --- a/airflow/jobs/job.py +++ b/airflow/jobs/job.py @@ -147,7 +147,7 @@ def is_alive(self, grace_multiplier=2.1): @provide_session def kill(self, session: Session = NEW_SESSION) -> NoReturn: - """Handles on_kill callback and updates state in database.""" + """Handle on_kill callback and updates state in database.""" job = session.scalar(select(Job).where(Job.id == self.id).limit(1)) job.end_date = timezone.utcnow() try: @@ -222,7 +222,7 @@ def heartbeat( @provide_session def prepare_for_execution(self, session: Session = NEW_SESSION): - """Prepares the job for execution.""" + """Prepare the job for execution.""" Stats.incr(self.__class__.__name__.lower() + "_start", 1, 1) self.state = JobState.RUNNING self.start_date = timezone.utcnow() @@ -240,7 +240,7 @@ def complete_execution(self, session: Session = NEW_SESSION): @provide_session def most_recent_job(self, session: Session = NEW_SESSION) -> Job | None: - """Returns the most recent job of this type, if any, based on last heartbeat received.""" + """Return the most recent job of this type, if any, based on last heartbeat received.""" return most_recent_job(self.job_type, session=session) @@ -272,7 +272,7 @@ def run_job( job: Job | JobPydantic, execute_callable: Callable[[], int | None], session: Session = NEW_SESSION ) -> int | None: """ - Runs the job. + Run the job. The Job is always an ORM object and setting the state is happening within the same DB session and the session is kept open throughout the whole execution. @@ -293,7 +293,7 @@ def run_job( def execute_job(job: Job | JobPydantic, execute_callable: Callable[[], int | None]) -> int | None: """ - Executes the job. + Execute the job. Job execution requires no session as generally executing session does not require an active database connection. The session might be temporary acquired and used if the job @@ -331,7 +331,7 @@ def perform_heartbeat( job: Job | JobPydantic, heartbeat_callback: Callable[[Session], None], only_if_necessary: bool ) -> None: """ - Performs heartbeat for the Job passed to it,optionally checking if it is necessary. + Perform heartbeat for the Job passed to it,optionally checking if it is necessary. :param job: job to perform heartbeat for :param heartbeat_callback: callback to run by the heartbeat diff --git a/airflow/jobs/local_task_job_runner.py b/airflow/jobs/local_task_job_runner.py index 6184a3e7fc42..c14250463932 100644 --- a/airflow/jobs/local_task_job_runner.py +++ b/airflow/jobs/local_task_job_runner.py @@ -111,13 +111,13 @@ def _execute(self) -> int | None: self.task_runner = get_task_runner(self) def signal_handler(signum, frame): - """Setting kill signal handler.""" + """Set kill signal handler.""" self.log.error("Received SIGTERM. Terminating subprocesses") self.task_runner.terminate() self.handle_task_exit(128 + signum) def segfault_signal_handler(signum, frame): - """Setting sigmentation violation signal handler.""" + """Set sigmentation violation signal handler.""" self.log.critical(SIGSEGV_MESSAGE) self.task_runner.terminate() self.handle_task_exit(128 + signum) diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 0ed7aae5ceb2..ffcaf4f1a384 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -240,7 +240,7 @@ def register_signals(self) -> None: signal.signal(signal.SIGUSR2, self._debug_dump) def _exit_gracefully(self, signum: int, frame: FrameType | None) -> None: - """Helper method to clean up processor_agent to avoid leaving orphan processes.""" + """Clean up processor_agent to avoid leaving orphan processes.""" if not _is_parent_process(): # Only the parent process should perform the cleanup. return @@ -905,7 +905,7 @@ def _update_dag_run_state_for_paused_dags(self, session: Session = NEW_SESSION) def _run_scheduler_loop(self) -> None: """ - The actual scheduler loop. + Harvest DAG parsing results, queue tasks, and perform executor heartbeat; the actual scheduler loop. The main steps in the loop are: #. Harvest DAG parsing results through DagFileProcessorAgent @@ -1021,7 +1021,7 @@ def _run_scheduler_loop(self) -> None: def _do_scheduling(self, session: Session) -> int: """ - This function is where the main scheduling decisions take places. + Make the main scheduling decisions. It: - Creates any necessary DAG runs by examining the next_dagrun_create_after column of DagModel @@ -1378,7 +1378,7 @@ def _schedule_all_dag_runs( dag_runs: Iterable[DagRun], session: Session, ) -> list[tuple[DagRun, DagCallbackRequest | None]]: - """Makes scheduling decisions for all `dag_runs`.""" + """Make scheduling decisions for all `dag_runs`.""" callback_tuples = [(run, self._schedule_dag_run(run, session=session)) for run in dag_runs] guard.commit() return callback_tuples @@ -1504,7 +1504,7 @@ def _send_dag_callbacks_to_processor(self, dag: DAG, callback: DagCallbackReques self.log.debug("callback is empty") def _send_sla_callbacks_to_processor(self, dag: DAG) -> None: - """Sends SLA Callbacks to DagFileProcessor if tasks have SLAs set and check_slas=True.""" + """Send SLA Callbacks to DagFileProcessor if tasks have SLAs set and check_slas=True.""" if not settings.CHECK_SLAS: return diff --git a/airflow/jobs/triggerer_job_runner.py b/airflow/jobs/triggerer_job_runner.py index 9f8dac6184d9..0cc1200e8a9a 100644 --- a/airflow/jobs/triggerer_job_runner.py +++ b/airflow/jobs/triggerer_job_runner.py @@ -289,14 +289,18 @@ def register_signals(self) -> None: @provide_session def is_needed(cls, session) -> bool: """ - Tests if the triggerer job needs to be run (i.e., if there are triggers in the trigger table). + Test if the triggerer job needs to be run (i.e., if there are triggers in the trigger table). This is used for the warning boxes in the UI. """ return session.query(func.count(Trigger.id)).scalar() > 0 def on_kill(self): - """Called when there is an external kill command (via the heartbeat mechanism, for example).""" + """ + Stop the trigger runner. + + Called when there is an external kill command (via the heartbeat mechanism, for example). + """ self.trigger_runner.stop = True def _kill_listener(self): @@ -306,7 +310,7 @@ def _kill_listener(self): self.listener.stop() def _exit_gracefully(self, signum, frame) -> None: - """Helper method to clean up processor_agent to avoid leaving orphan processes.""" + """Clean up processor_agent to avoid leaving orphan processes.""" # The first time, try to exit nicely if not self.trigger_runner.stop: self.log.info("Exiting gracefully upon receiving signal %s", signum) @@ -340,11 +344,7 @@ def _execute(self) -> int | None: return None def _run_trigger_loop(self) -> None: - """ - The main-thread trigger loop. - - This runs synchronously and handles all database reads/writes. - """ + """Run synchronously and handle all database reads/writes; the main-thread trigger loop.""" while not self.trigger_runner.stop: if not self.trigger_runner.is_alive(): self.log.error("Trigger runner thread has died! Exiting.") @@ -381,7 +381,7 @@ def handle_events(self): def handle_failed_triggers(self): """ - Handles "failed" triggers. - ones that errored or exited before they sent an event. + Handle "failed" triggers. - ones that errored or exited before they sent an event. Task Instances that depend on them need failing. """ @@ -449,15 +449,14 @@ def __init__(self): self.job_id = None def run(self): - """Sync entrypoint - just runs arun in an async loop.""" + """Sync entrypoint - just run a run in an async loop.""" asyncio.run(self.arun()) async def arun(self): """ - Main (asynchronous) logic loop. + Run trigger addition/deletion/cleanup; main (asynchronous) logic loop. - The loop in here runs trigger addition/deletion/cleanup. Actual - triggers run in their own separate coroutines. + Actual triggers run in their own separate coroutines. """ watchdog = asyncio.create_task(self.block_watchdog()) last_status = time.time() @@ -634,7 +633,7 @@ def mark_trigger_end(trigger): def update_triggers(self, requested_trigger_ids: set[int]): """ - Called from the main thread to request that we update what triggers we're running. + Request that we update what triggers we're running. Works out the differences - ones to add, and ones to remove - then adds them to the deques so the subthread can actually mutate the running @@ -701,7 +700,7 @@ def set_trigger_logging_metadata(self, ti: TaskInstance, trigger_id, trigger): def get_trigger_by_classpath(self, classpath: str) -> type[BaseTrigger]: """ - Gets a trigger class by its classpath ("path.to.module.classname"). + Get a trigger class by its classpath ("path.to.module.classname"). Uses a cache dictionary to speed up lookups after the first time. """ diff --git a/airflow/kubernetes/pre_7_4_0_compatibility/kube_client.py b/airflow/kubernetes/pre_7_4_0_compatibility/kube_client.py index d2e791dbfdf6..393a3ce94fad 100644 --- a/airflow/kubernetes/pre_7_4_0_compatibility/kube_client.py +++ b/airflow/kubernetes/pre_7_4_0_compatibility/kube_client.py @@ -52,7 +52,7 @@ def _disable_verify_ssl() -> None: def _enable_tcp_keepalive() -> None: """ - This function enables TCP keepalive mechanism. + Enable TCP keepalive mechanism. This prevents urllib3 connection to hang indefinitely when idle connection is time-outed on services like cloud load balancers or firewalls. @@ -95,7 +95,7 @@ def get_kube_client( config_file: str | None = None, ) -> client.CoreV1Api: """ - Retrieves Kubernetes client. + Retrieve Kubernetes client. :param in_cluster: whether we are in cluster :param cluster_context: context of the cluster diff --git a/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py b/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py index 9154e83fde86..1fe8d50d76c4 100644 --- a/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py +++ b/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator.py @@ -165,7 +165,7 @@ def __init__( self.extract_xcom = extract_xcom def gen_pod(self) -> k8s.V1Pod: - """Generates pod.""" + """Generate pod.""" warnings.warn("This function is deprecated. ", RemovedInAirflow3Warning) result = self.ud_pod @@ -178,7 +178,7 @@ def gen_pod(self) -> k8s.V1Pod: @staticmethod def add_xcom_sidecar(pod: k8s.V1Pod) -> k8s.V1Pod: - """Adds sidecar.""" + """Add sidecar.""" warnings.warn( "This function is deprecated. " "Please use airflow.providers.cncf.kubernetes.utils.xcom_sidecar.add_xcom_sidecar instead" @@ -194,7 +194,7 @@ def add_xcom_sidecar(pod: k8s.V1Pod) -> k8s.V1Pod: @staticmethod def from_obj(obj) -> dict | k8s.V1Pod | None: - """Converts to pod from obj.""" + """Convert to pod from obj.""" if obj is None: return None @@ -228,7 +228,7 @@ def from_obj(obj) -> dict | k8s.V1Pod | None: @staticmethod def from_legacy_obj(obj) -> k8s.V1Pod | None: - """Converts to pod from obj.""" + """Convert to pod from obj.""" if obj is None: return None diff --git a/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator_deprecated.py b/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator_deprecated.py index 8876556a8d74..df1b78c0252e 100644 --- a/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator_deprecated.py +++ b/airflow/kubernetes/pre_7_4_0_compatibility/pod_generator_deprecated.py @@ -216,7 +216,7 @@ def __init__( self.extract_xcom = extract_xcom def gen_pod(self) -> k8s.V1Pod: - """Generates pod.""" + """Generate pod.""" result = None if result is None: @@ -234,7 +234,7 @@ def gen_pod(self) -> k8s.V1Pod: @staticmethod def add_sidecar(pod: k8s.V1Pod) -> k8s.V1Pod: - """Adds sidecar.""" + """Add sidecar.""" pod_cp = copy.deepcopy(pod) pod_cp.spec.volumes = pod.spec.volumes or [] pod_cp.spec.volumes.insert(0, PodDefaults.VOLUME) @@ -246,7 +246,7 @@ def add_sidecar(pod: k8s.V1Pod) -> k8s.V1Pod: @staticmethod def from_obj(obj) -> k8s.V1Pod | None: - """Converts to pod from obj.""" + """Convert to pod from obj.""" if obj is None: return None diff --git a/airflow/kubernetes/pre_7_4_0_compatibility/secret.py b/airflow/kubernetes/pre_7_4_0_compatibility/secret.py index 14295f5c7a89..b02bbb3dd70e 100644 --- a/airflow/kubernetes/pre_7_4_0_compatibility/secret.py +++ b/airflow/kubernetes/pre_7_4_0_compatibility/secret.py @@ -65,7 +65,7 @@ def __init__(self, deploy_type, deploy_target, secret, key=None, items=None): self.key = key def to_env_secret(self) -> k8s.V1EnvVar: - """Stores es environment secret.""" + """Store es environment secret.""" return k8s.V1EnvVar( name=self.deploy_target, value_from=k8s.V1EnvVarSource( @@ -74,11 +74,11 @@ def to_env_secret(self) -> k8s.V1EnvVar: ) def to_env_from_secret(self) -> k8s.V1EnvFromSource: - """Reads from environment to secret.""" + """Read from environment to secret.""" return k8s.V1EnvFromSource(secret_ref=k8s.V1SecretEnvSource(name=self.secret)) def to_volume_secret(self) -> tuple[k8s.V1Volume, k8s.V1VolumeMount]: - """Converts to volume secret.""" + """Convert to volume secret.""" vol_id = f"secretvol{uuid.uuid4()}" volume = k8s.V1Volume(name=vol_id, secret=k8s.V1SecretVolumeSource(secret_name=self.secret)) if self.items: diff --git a/airflow/lineage/__init__.py b/airflow/lineage/__init__.py index 22e7d82c092a..e22f264fdb4e 100644 --- a/airflow/lineage/__init__.py +++ b/airflow/lineage/__init__.py @@ -38,7 +38,7 @@ def get_backend() -> LineageBackend | None: - """Gets the lineage backend if defined in the configs.""" + """Get the lineage backend if defined in the configs.""" clazz = conf.getimport("lineage", "backend", fallback=None) if clazz: @@ -99,7 +99,7 @@ def wrapper(self, context, *args, **kwargs): def prepare_lineage(func: T) -> T: """ - Prepares the lineage inlets and outlets. + Prepare the lineage inlets and outlets. Inlets can be: diff --git a/airflow/lineage/backend.py b/airflow/lineage/backend.py index 29a755109c64..1ccfa78b890b 100644 --- a/airflow/lineage/backend.py +++ b/airflow/lineage/backend.py @@ -35,7 +35,7 @@ def send_lineage( context: dict | None = None, ): """ - Sends lineage metadata to a backend. + Send lineage metadata to a backend. :param operator: the operator executing a transformation on the inlets and outlets :param inlets: the inlets to this operator diff --git a/airflow/listeners/spec/dagrun.py b/airflow/listeners/spec/dagrun.py index d2ae1a6b78cb..3337f4b9a16c 100644 --- a/airflow/listeners/spec/dagrun.py +++ b/airflow/listeners/spec/dagrun.py @@ -29,14 +29,14 @@ @hookspec def on_dag_run_running(dag_run: DagRun, msg: str): - """Called when dag run state changes to RUNNING.""" + """Execute when dag run state changes to RUNNING.""" @hookspec def on_dag_run_success(dag_run: DagRun, msg: str): - """Called when dag run state changes to SUCCESS.""" + """Execute when dag run state changes to SUCCESS.""" @hookspec def on_dag_run_failed(dag_run: DagRun, msg: str): - """Called when dag run state changes to FAIL.""" + """Execute when dag run state changes to FAIL.""" diff --git a/airflow/listeners/spec/lifecycle.py b/airflow/listeners/spec/lifecycle.py index 6ab0aa3b5cde..c5e3bb52e4dd 100644 --- a/airflow/listeners/spec/lifecycle.py +++ b/airflow/listeners/spec/lifecycle.py @@ -25,7 +25,7 @@ @hookspec def on_starting(component): """ - Called before Airflow component - jobs like scheduler, worker, or task runner starts. + Execute before Airflow component - jobs like scheduler, worker, or task runner starts. It's guaranteed this will be called before any other plugin method. @@ -36,7 +36,7 @@ def on_starting(component): @hookspec def before_stopping(component): """ - Called before Airflow component - jobs like scheduler, worker, or task runner stops. + Execute before Airflow component - jobs like scheduler, worker, or task runner stops. It's guaranteed this will be called after any other plugin method. diff --git a/airflow/listeners/spec/taskinstance.py b/airflow/listeners/spec/taskinstance.py index b87043a99d8f..03f0a0047893 100644 --- a/airflow/listeners/spec/taskinstance.py +++ b/airflow/listeners/spec/taskinstance.py @@ -34,18 +34,18 @@ def on_task_instance_running( previous_state: TaskInstanceState | None, task_instance: TaskInstance, session: Session | None ): - """Called when task state changes to RUNNING. previous_state can be None.""" + """Execute when task state changes to RUNNING. previous_state can be None.""" @hookspec def on_task_instance_success( previous_state: TaskInstanceState | None, task_instance: TaskInstance, session: Session | None ): - """Called when task state changes to SUCCESS. previous_state can be None.""" + """Execute when task state changes to SUCCESS. previous_state can be None.""" @hookspec def on_task_instance_failed( previous_state: TaskInstanceState | None, task_instance: TaskInstance, session: Session | None ): - """Called when task state changes to FAIL. previous_state can be None.""" + """Execute when task state changes to FAIL. previous_state can be None.""" From 4c0158440fa99e122109b441dda375de19ea5ff3 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 12 Aug 2023 11:53:19 -0700 Subject: [PATCH 041/117] D401 Support - Macros to Operators (Inclusive) (#33337) * D401 Support - airflow/macros thru airflow/operators * fix static checks (cherry picked from commit 2efb3a64f5446a5103c4107e4d2db4c629344faa) --- airflow/metrics/otel_logger.py | 18 ++++---- airflow/metrics/statsd_logger.py | 2 +- airflow/metrics/validators.py | 2 +- airflow/migrations/utils.py | 2 +- airflow/models/abstractoperator.py | 6 ++- airflow/models/baseoperator.py | 32 +++++++------- airflow/models/connection.py | 16 +++---- airflow/models/dag.py | 50 +++++++++++----------- airflow/models/dagbag.py | 8 ++-- airflow/models/dagcode.py | 12 +++--- airflow/models/dagrun.py | 22 +++++----- airflow/models/mappedoperator.py | 6 +-- airflow/models/param.py | 6 +-- airflow/models/serialized_dag.py | 12 +++--- airflow/models/skipmixin.py | 6 +-- airflow/models/taskinstance.py | 44 +++++++++---------- airflow/models/taskinstancekey.py | 2 +- airflow/models/taskmixin.py | 12 +++--- airflow/models/taskreschedule.py | 4 +- airflow/models/trigger.py | 8 ++-- airflow/models/variable.py | 8 ++-- airflow/models/xcom.py | 10 ++--- airflow/models/xcom_arg.py | 12 +++--- airflow/notifications/basenotifier.py | 2 +- airflow/operators/bash.py | 2 +- airflow/operators/datetime.py | 2 +- airflow/operators/python.py | 6 +-- docs/apache-airflow/img/airflow_erd.sha256 | 2 +- docs/apache-airflow/img/airflow_erd.svg | 8 ++-- 29 files changed, 163 insertions(+), 159 deletions(-) diff --git a/airflow/metrics/otel_logger.py b/airflow/metrics/otel_logger.py index 3e4701ca6077..8092fbe76e35 100644 --- a/airflow/metrics/otel_logger.py +++ b/airflow/metrics/otel_logger.py @@ -87,7 +87,7 @@ def _generate_key_name(name: str, attributes: Attributes = None): def name_is_otel_safe(prefix: str, name: str) -> bool: """ - Returns True if the provided name and prefix would result in a name that meets the OpenTelemetry standard. + Return True if the provided name and prefix would result in a name that meets the OpenTelemetry standard. Legal names are defined here: https://opentelemetry.io/docs/reference/specification/metrics/api/#instrument-name-syntax @@ -110,7 +110,7 @@ def _type_as_str(obj: Instrument) -> str: def _get_otel_safe_name(name: str) -> str: """ - Verifies that the provided name does not exceed OpenTelemetry's maximum length for metric names. + Verify that the provided name does not exceed OpenTelemetry's maximum length for metric names. :param name: The original metric name :returns: The name, truncated to an OTel-acceptable length if required. @@ -290,7 +290,7 @@ def clear(self) -> None: self.map.clear() def _create_counter(self, name): - """Creates a new counter or up_down_counter for the provided name.""" + """Create a new counter or up_down_counter for the provided name.""" otel_safe_name = _get_otel_safe_name(name) if _is_up_down_counter(name): @@ -303,7 +303,7 @@ def _create_counter(self, name): def get_counter(self, name: str, attributes: Attributes = None): """ - Returns the counter; creates a new one if it did not exist. + Return the counter; creates a new one if it did not exist. :param name: The name of the counter to fetch or create. :param attributes: Counter attributes, used to generate a unique key to store the counter. @@ -315,7 +315,7 @@ def get_counter(self, name: str, attributes: Attributes = None): def del_counter(self, name: str, attributes: Attributes = None) -> None: """ - Deletes a counter. + Delete a counter. :param name: The name of the counter to delete. :param attributes: Counter attributes which were used to generate a unique key to store the counter. @@ -326,7 +326,7 @@ def del_counter(self, name: str, attributes: Attributes = None) -> None: def set_gauge_value(self, name: str, value: float | None, delta: bool, tags: Attributes): """ - Overrides the last reading for a Gauge with a new value. + Override the last reading for a Gauge with a new value. :param name: The name of the gauge to record. :param value: The new reading to record. @@ -344,7 +344,7 @@ def set_gauge_value(self, name: str, value: float | None, delta: bool, tags: Att def _create_gauge(self, name: str, attributes: Attributes = None): """ - Creates a new Observable Gauge with the provided name and the default value. + Create a new Observable Gauge with the provided name and the default value. :param name: The name of the gauge to fetch or create. :param attributes: Gauge attributes, used to generate a unique key to store the gauge. @@ -361,12 +361,12 @@ def _create_gauge(self, name: str, attributes: Attributes = None): return gauge def read_gauge(self, key: str, *args) -> Iterable[Observation]: - """Callback for the Observable Gauges, returns the Observation for the provided key.""" + """Return the Observation for the provided key; callback for the Observable Gauges.""" yield self.map[key] def poke_gauge(self, name: str, attributes: Attributes = None) -> GaugeValues: """ - Returns the value of the gauge; creates a new one with the default value if it did not exist. + Return the value of the gauge; creates a new one with the default value if it did not exist. :param name: The name of the gauge to fetch or create. :param attributes: Gauge attributes, used to generate a unique key to store the gauge. diff --git a/airflow/metrics/statsd_logger.py b/airflow/metrics/statsd_logger.py index f54e00f48898..2e69d6e60590 100644 --- a/airflow/metrics/statsd_logger.py +++ b/airflow/metrics/statsd_logger.py @@ -150,7 +150,7 @@ def timer( def get_statsd_logger(cls) -> SafeStatsdLogger: - """Returns logger for StatsD.""" + """Return logger for StatsD.""" # no need to check for the scheduler/statsd_on -> this method is only called when it is set # and previously it would crash with None is callable if it was called without it. from statsd import StatsClient diff --git a/airflow/metrics/validators.py b/airflow/metrics/validators.py index 0fd5fd1adef8..501229578d2b 100644 --- a/airflow/metrics/validators.py +++ b/airflow/metrics/validators.py @@ -107,7 +107,7 @@ def stat_name_otel_handler( max_length: int = OTEL_NAME_MAX_LENGTH, ) -> str: """ - Verifies that a proposed prefix and name combination will meet OpenTelemetry naming standards. + Verify that a proposed prefix and name combination will meet OpenTelemetry naming standards. See: https://opentelemetry.io/docs/reference/specification/metrics/api/#instrument-name-syntax diff --git a/airflow/migrations/utils.py b/airflow/migrations/utils.py index a5a65c6745a5..bc31c8f70c5e 100644 --- a/airflow/migrations/utils.py +++ b/airflow/migrations/utils.py @@ -24,7 +24,7 @@ def get_mssql_table_constraints(conn, table_name) -> dict[str, dict[str, list[str]]]: """ - Returns the primary and unique constraint along with column name. + Return the primary and unique constraint along with column name. Some tables like `task_instance` are missing the primary key constraint name and the name is auto-generated by the SQL server, so this function diff --git a/airflow/models/abstractoperator.py b/airflow/models/abstractoperator.py index 458d55ef5da1..ba357c0bd1bb 100644 --- a/airflow/models/abstractoperator.py +++ b/airflow/models/abstractoperator.py @@ -463,7 +463,8 @@ def get_extra_links(self, ti: TaskInstance, link_name: str) -> str | None: @cache def get_parse_time_mapped_ti_count(self) -> int: - """Number of mapped task instances that can be created on DAG run creation. + """ + Return the number of mapped task instances that can be created on DAG run creation. This only considers literal mapped arguments, and would return *None* when any non-literal values are used for mapping. @@ -479,7 +480,8 @@ def get_parse_time_mapped_ti_count(self) -> int: return group.get_parse_time_mapped_ti_count() def get_mapped_ti_count(self, run_id: str, *, session: Session) -> int: - """Number of mapped TaskInstances that can be created at run time. + """ + Return the number of mapped TaskInstances that can be created at run time. This considers both literal and non-literal mapped arguments, and the result is therefore available when all depended tasks have finished. The diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 45462bf72619..47e6c07ee206 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -991,7 +991,7 @@ def __hash__(self): # including lineage information def __or__(self, other): """ - Called for [This Operator] | [Operator]. + Return [This Operator] | [Operator]. The inlets of other will be set to pick up the outlets from this operator. Other will be set as a downstream task of this operator. @@ -1010,7 +1010,7 @@ def __or__(self, other): def __gt__(self, other): """ - Called for [Operator] > [Outlet]. + Return [Operator] > [Outlet]. If other is an attr annotated object it is set as an outlet of this Operator. """ @@ -1026,7 +1026,7 @@ def __gt__(self, other): def __lt__(self, other): """ - Called for [Inlet] > [Operator] or [Operator] < [Inlet]. + Return [Inlet] > [Operator] or [Operator] < [Inlet]. If other is an attr annotated object it is set as an inlet to this operator. """ @@ -1054,22 +1054,22 @@ def __setattr__(self, key, value): self.set_xcomargs_dependencies() def add_inlets(self, inlets: Iterable[Any]): - """Sets inlets to this operator.""" + """Set inlets to this operator.""" self.inlets.extend(inlets) def add_outlets(self, outlets: Iterable[Any]): - """Defines the outlets of this operator.""" + """Define the outlets of this operator.""" self.outlets.extend(outlets) def get_inlet_defs(self): - """Gets inlet definitions on this task. + """Get inlet definitions on this task. :meta private: """ return self.inlets def get_outlet_defs(self): - """Gets outlet definitions on this task. + """Get outlet definitions on this task. :meta private: """ @@ -1109,7 +1109,7 @@ def dag(self, dag: DAG | None): self._dag = dag def has_dag(self): - """Returns True if the Operator has been assigned to a DAG.""" + """Return True if the Operator has been assigned to a DAG.""" return self._dag is not None deps: frozenset[BaseTIDep] = frozenset( @@ -1134,7 +1134,7 @@ def prepare_for_execution(self) -> BaseOperator: def set_xcomargs_dependencies(self) -> None: """ - Resolves upstream dependencies of a task. + Resolve upstream dependencies of a task. In this way passing an ``XComArg`` as value for a template field will result in creating upstream relation between two tasks. @@ -1163,13 +1163,13 @@ def set_xcomargs_dependencies(self) -> None: @prepare_lineage def pre_execute(self, context: Any): - """This hook is triggered right before self.execute() is called.""" + """Execute right before self.execute() is called.""" if self._pre_execute_hook is not None: self._pre_execute_hook(context) def execute(self, context: Context) -> Any: """ - This is the main method to derive when creating an operator. + Derive when creating an operator. Context is the same dictionary used as when rendering jinja templates. @@ -1180,7 +1180,7 @@ def execute(self, context: Context) -> Any: @apply_lineage def post_execute(self, context: Any, result: Any = None): """ - This hook is triggered right after self.execute() is called. + Execute right after self.execute() is called. It is passed the execution context and any results returned by the operator. """ @@ -1252,7 +1252,7 @@ def clear( downstream: bool = False, session: Session = NEW_SESSION, ): - """Clears the state of task instances associated with the task, following the parameters specified.""" + """Clear the state of task instances associated with the task, following the parameters specified.""" qry = select(TaskInstance).where(TaskInstance.dag_id == self.dag_id) if start_date: @@ -1355,7 +1355,7 @@ def run( ) def dry_run(self) -> None: - """Performs dry run for the operator - just render template fields.""" + """Perform dry run for the operator - just render template fields.""" self.log.info("Dry run") for field in self.template_fields: try: @@ -1563,7 +1563,7 @@ def get_serialized_fields(cls): return cls.__serialized_fields def serialize_for_task_group(self) -> tuple[DagAttributeTypes, Any]: - """Required by DAGNode.""" + """Serialize; required by DAGNode.""" return DagAttributeTypes.OP, self.task_id @property @@ -1837,7 +1837,7 @@ def cross_downstream( def chain_linear(*elements: DependencyMixin | Sequence[DependencyMixin]): """ - Helper to simplify task dependency definition. + Simplify task dependency definition. E.g.: suppose you want precedence like so:: diff --git a/airflow/models/connection.py b/airflow/models/connection.py index fe514cdf48c7..e0ba22edd79f 100644 --- a/airflow/models/connection.py +++ b/airflow/models/connection.py @@ -39,7 +39,7 @@ def parse_netloc_to_hostname(*args, **kwargs): - """This method is deprecated.""" + """Do not use, this method is deprecated.""" warnings.warn("This method is deprecated.", RemovedInAirflow3Warning) return _parse_netloc_to_hostname(*args, **kwargs) @@ -142,7 +142,7 @@ def __init__( @staticmethod def _validate_extra(extra, conn_id) -> None: """ - Here we verify that ``extra`` is a JSON-encoded Python dict. + Verify that ``extra`` is a JSON-encoded Python dict. From Airflow 3.0, we should no longer suppress these errors but raise instead. """ @@ -173,7 +173,7 @@ def on_db_load(self): mask_secret(self.password) def parse_from_uri(self, **uri): - """This method is deprecated. Please use uri parameter in constructor.""" + """Use uri parameter in constructor, this method is deprecated.""" warnings.warn( "This method is deprecated. Please use uri parameter in constructor.", RemovedInAirflow3Warning, @@ -219,7 +219,7 @@ def _parse_from_uri(self, uri: str): @staticmethod def _create_host(protocol, host) -> str | None: - """Returns the connection host with the protocol.""" + """Return the connection host with the protocol.""" if not host: return host if protocol: @@ -378,9 +378,9 @@ def __repr__(self): def log_info(self): """ - This method is deprecated. + Read each field individually or use the default representation (`__repr__`). - You can read each field individually or use the default representation (`__repr__`). + This method is deprecated. """ warnings.warn( "This method is deprecated. You can read each field individually or " @@ -396,9 +396,9 @@ def log_info(self): def debug_info(self): """ - This method is deprecated. + Read each field individually or use the default representation (`__repr__`). - You can read each field individually or use the default representation (`__repr__`). + This method is deprecated. """ warnings.warn( "This method is deprecated. You can read each field individually or " diff --git a/airflow/models/dag.py b/airflow/models/dag.py index f2d9b8e63d43..36555ce10fc8 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -219,7 +219,7 @@ def create_timetable(interval: ScheduleIntervalArg, timezone: Timezone) -> Timet def get_last_dagrun(dag_id, session, include_externally_triggered=False): """ - Returns the last dag run for a dag, None if there was none. + Return the last dag run for a dag, None if there was none. Last dag run can be any type of run e.g. scheduled or backfilled. Overridden DagRuns are ignored. @@ -825,7 +825,7 @@ def is_fixed_time_schedule(self): def following_schedule(self, dttm): """ - Calculates the following schedule for this dag in UTC. + Calculate the following schedule for this dag in UTC. :param dttm: utc datetime :return: utc datetime @@ -1103,7 +1103,7 @@ def iter_dagrun_infos_between( def get_run_dates(self, start_date, end_date=None) -> list: """ - Returns a list of dates between the interval received as parameter using this dag's schedule interval. + Return a list of dates between the interval received as parameter using this dag's schedule interval. Returned dates can be used for execution dates. @@ -1318,7 +1318,7 @@ def allow_future_exec_dates(self) -> bool: @provide_session def get_concurrency_reached(self, session=NEW_SESSION) -> bool: - """Returns a boolean indicating whether the max_active_tasks limit for this DAG has been reached.""" + """Return a boolean indicating whether the max_active_tasks limit for this DAG has been reached.""" TI = TaskInstance total_tasks = session.scalar( select(func.count(TI.task_id)).where( @@ -1330,7 +1330,7 @@ def get_concurrency_reached(self, session=NEW_SESSION) -> bool: @property def concurrency_reached(self): - """This attribute is deprecated. Please use `airflow.models.DAG.get_concurrency_reached` method.""" + """Use `airflow.models.DAG.get_concurrency_reached`, this attribute is deprecated.""" warnings.warn( "This attribute is deprecated. Please use `airflow.models.DAG.get_concurrency_reached` method.", RemovedInAirflow3Warning, @@ -1340,17 +1340,17 @@ def concurrency_reached(self): @provide_session def get_is_active(self, session=NEW_SESSION) -> None: - """Returns a boolean indicating whether this DAG is active.""" + """Return a boolean indicating whether this DAG is active.""" return session.scalar(select(DagModel.is_active).where(DagModel.dag_id == self.dag_id)) @provide_session def get_is_paused(self, session=NEW_SESSION) -> None: - """Returns a boolean indicating whether this DAG is paused.""" + """Return a boolean indicating whether this DAG is paused.""" return session.scalar(select(DagModel.is_paused).where(DagModel.dag_id == self.dag_id)) @property def is_paused(self): - """This attribute is deprecated. Please use `airflow.models.DAG.get_is_paused` method.""" + """Use `airflow.models.DAG.get_is_paused`, this attribute is deprecated.""" warnings.warn( "This attribute is deprecated. Please use `airflow.models.DAG.get_is_paused` method.", RemovedInAirflow3Warning, @@ -1408,7 +1408,7 @@ def handle_callback(self, dagrun, success=True, reason=None, session=NEW_SESSION def get_active_runs(self): """ - Returns a list of dag run execution dates currently running. + Return a list of dag run execution dates currently running. :return: List of execution dates """ @@ -1423,7 +1423,7 @@ def get_active_runs(self): @provide_session def get_num_active_runs(self, external_trigger=None, only_running=True, session=NEW_SESSION): """ - Returns the number of active "running" dag runs. + Return the number of active "running" dag runs. :param external_trigger: True for externally triggered active dag runs :param session: @@ -1450,7 +1450,7 @@ def get_dagrun( session: Session = NEW_SESSION, ): """ - Returns the dag run for a given execution date or run_id if it exists, otherwise none. + Return the dag run for a given execution date or run_id if it exists, otherwise none. :param execution_date: The execution date of the DagRun to find. :param run_id: The run_id of the DagRun to find. @@ -1469,7 +1469,7 @@ def get_dagrun( @provide_session def get_dagruns_between(self, start_date, end_date, session=NEW_SESSION): """ - Returns the list of dag runs between start_date (inclusive) and end_date (inclusive). + Return the list of dag runs between start_date (inclusive) and end_date (inclusive). :param start_date: The starting execution date of the DagRun to find. :param end_date: The ending execution date of the DagRun to find. @@ -1488,12 +1488,12 @@ def get_dagruns_between(self, start_date, end_date, session=NEW_SESSION): @provide_session def get_latest_execution_date(self, session: Session = NEW_SESSION) -> pendulum.DateTime | None: - """Returns the latest date for which at least one dag run exists.""" + """Return the latest date for which at least one dag run exists.""" return session.scalar(select(func.max(DagRun.execution_date)).where(DagRun.dag_id == self.dag_id)) @property def latest_execution_date(self): - """This attribute is deprecated. Please use `airflow.models.DAG.get_latest_execution_date`.""" + """Use `airflow.models.DAG.get_latest_execution_date`, this attribute is deprecated.""" warnings.warn( "This attribute is deprecated. Please use `airflow.models.DAG.get_latest_execution_date`.", RemovedInAirflow3Warning, @@ -1503,7 +1503,7 @@ def latest_execution_date(self): @property def subdags(self): - """Returns a list of the subdag objects associated to this DAG.""" + """Return a list of the subdag objects associated to this DAG.""" # Check SubDag for class but don't check class directly from airflow.operators.subdag import SubDagOperator @@ -2147,7 +2147,7 @@ def clear( exclude_task_ids: frozenset[str] | frozenset[tuple[str, int]] | None = frozenset(), ) -> int | Iterable[TaskInstance]: """ - Clears a set of task instances associated with the current dag for a specified date range. + Clear a set of task instances associated with the current dag for a specified date range. :param task_ids: List of task ids or (``task_id``, ``map_index``) tuples to clear :param start_date: The minimum execution_date to clear @@ -2312,7 +2312,7 @@ def __deepcopy__(self, memo): return result def sub_dag(self, *args, **kwargs): - """This method is deprecated in favor of partial_subset.""" + """Use `airflow.models.DAG.partial_subset`, this method is deprecated.""" warnings.warn( "This method is deprecated and will be removed in a future version. Please use partial_subset", RemovedInAirflow3Warning, @@ -2605,7 +2605,7 @@ def run( disable_retry=False, ): """ - Runs the DAG. + Run the DAG. :param start_date: the start date of the range to run :param end_date: the end date of the range to run @@ -2771,7 +2771,7 @@ def create_dagrun( data_interval: tuple[datetime, datetime] | None = None, ): """ - Creates a dag run from this dag including the tasks associated with this dag. + Create a dag run from this dag including the tasks associated with this dag. Returns the dag run. @@ -2877,7 +2877,7 @@ def bulk_sync_to_db( dags: Collection[DAG], session=NEW_SESSION, ): - """This method is deprecated in favor of bulk_write_to_db.""" + """Use `airflow.models.DAG.bulk_write_to_db`, this method is deprecated.""" warnings.warn( "This method is deprecated and will be removed in a future version. Please use bulk_write_to_db", RemovedInAirflow3Warning, @@ -3132,7 +3132,7 @@ def sync_to_db(self, processor_subdir: str | None = None, session=NEW_SESSION): self.bulk_write_to_db([self], processor_subdir=processor_subdir, session=session) def get_default_view(self): - """This is only there for backward compatible jinja2 templates.""" + """Allow backward compatible jinja2 templates.""" if self.default_view is None: return airflow_conf.get("webserver", "dag_default_view").lower() else: @@ -3181,7 +3181,7 @@ def deactivate_stale_dags(expiration_date, session=NEW_SESSION): @provide_session def get_num_task_instances(dag_id, run_id=None, task_ids=None, states=None, session=NEW_SESSION) -> int: """ - Returns the number of task instances in the given DAG. + Return the number of task instances in the given DAG. :param session: ORM session :param dag_id: ID of the DAG to get the task concurrency of @@ -3259,7 +3259,7 @@ def get_edge_info(self, upstream_task_id: str, downstream_task_id: str) -> EdgeI def set_edge_info(self, upstream_task_id: str, downstream_task_id: str, info: EdgeInfoType): """ - Sets the given edge information on the DAG. + Set the given edge information on the DAG. Note that this will overwrite, rather than merge with, existing info. """ @@ -3267,7 +3267,7 @@ def set_edge_info(self, upstream_task_id: str, downstream_task_id: str, info: Ed def validate_schedule_and_params(self): """ - Validates Param values when the schedule_interval is not None. + Validate Param values when the schedule_interval is not None. Raise exception if there are any Params in the DAG which neither have a default value nor have the null in schema['type'] list, but the DAG have a schedule_interval which is not None. @@ -3284,7 +3284,7 @@ def validate_schedule_and_params(self): def iter_invalid_owner_links(self) -> Iterator[tuple[str, str]]: """ - Parses a given link, and verifies if it's a valid URL, or a 'mailto' link. + Parse a given link, and verifies if it's a valid URL, or a 'mailto' link. Returns an iterator of invalid (owner, link) pairs. """ diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index a8f5b4d6fc28..8608bcd13824 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -174,7 +174,7 @@ def dag_ids(self) -> list[str]: @provide_session def get_dag(self, dag_id, session: Session = None): """ - Gets the DAG out of the dictionary, and refreshes it if expired. + Get the DAG out of the dictionary, and refreshes it if expired. :param dag_id: DAG ID """ @@ -453,7 +453,7 @@ def _process_modules(self, filepath, mods, file_last_changed_on_disk): def bag_dag(self, dag, root_dag): """ - Adds the DAG into the bag, recurses into sub dags. + Add the DAG into the bag, recurses into sub dags. :raises: AirflowDagCycleException if a cycle is detected in this dag or its subdags. :raises: AirflowDagDuplicatedIdException if this dag or its subdags already exists in the bag. @@ -568,7 +568,7 @@ def collect_dags( self.dagbag_stats = sorted(stats, key=lambda x: x.duration, reverse=True) def collect_dags_from_db(self): - """Collects DAGs from database.""" + """Collect DAGs from database.""" from airflow.models.serialized_dag import SerializedDagModel with Stats.timer("collect_db_dags"): @@ -588,7 +588,7 @@ def collect_dags_from_db(self): self.dags.update(subdags) def dagbag_report(self): - """Prints a report around DagBag loading stats.""" + """Print a report around DagBag loading stats.""" stats = self.dagbag_stats dag_folder = self.dag_folder duration = sum((o.duration for o in stats), timedelta()).total_seconds() diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index 61b007bf6ce1..206c97e08f29 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -61,7 +61,7 @@ def __init__(self, full_filepath: str, source_code: str | None = None): @provide_session def sync_to_db(self, session: Session = NEW_SESSION) -> None: - """Writes code into database. + """Write code into database. :param session: ORM Session """ @@ -70,7 +70,7 @@ def sync_to_db(self, session: Session = NEW_SESSION) -> None: @classmethod @provide_session def bulk_sync_to_db(cls, filelocs: Iterable[str], session: Session = NEW_SESSION) -> None: - """Writes code in bulk into database. + """Write code in bulk into database. :param filelocs: file paths of DAGs to sync :param session: ORM Session @@ -126,7 +126,7 @@ def bulk_sync_to_db(cls, filelocs: Iterable[str], session: Session = NEW_SESSION @classmethod @provide_session def remove_deleted_code(cls, alive_dag_filelocs: Collection[str], session: Session = NEW_SESSION) -> None: - """Deletes code not included in alive_dag_filelocs. + """Delete code not included in alive_dag_filelocs. :param alive_dag_filelocs: file paths of alive DAGs :param session: ORM Session @@ -144,7 +144,7 @@ def remove_deleted_code(cls, alive_dag_filelocs: Collection[str], session: Sessi @classmethod @provide_session def has_dag(cls, fileloc: str, session: Session = NEW_SESSION) -> bool: - """Checks a file exist in dag_code table. + """Check a file exist in dag_code table. :param fileloc: the file to check :param session: ORM Session @@ -157,7 +157,7 @@ def has_dag(cls, fileloc: str, session: Session = NEW_SESSION) -> bool: @classmethod def get_code_by_fileloc(cls, fileloc: str) -> str: - """Returns source code for a given fileloc. + """Return source code for a given fileloc. :param fileloc: file path of a DAG :return: source code as string @@ -166,7 +166,7 @@ def get_code_by_fileloc(cls, fileloc: str) -> str: @classmethod def code(cls, fileloc) -> str: - """Returns source code for this DagCode object. + """Return source code for this DagCode object. :return: source code as string """ diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index a6e1da6a0ec1..923d6f3d8af1 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -91,7 +91,7 @@ class TISchedulingDecision(NamedTuple): def _creator_note(val): - """Custom creator for the ``note`` association proxy.""" + """Creator the ``note`` association proxy.""" if isinstance(val, str): return DagRunNote(content=val) elif isinstance(val, dict): @@ -281,7 +281,7 @@ def state(self): @provide_session def refresh_from_db(self, session: Session = NEW_SESSION) -> None: """ - Reloads the current dagrun from the database. + Reload the current dagrun from the database. :param session: database session """ @@ -380,7 +380,7 @@ def find( execution_end_date: datetime | None = None, ) -> list[DagRun]: """ - Returns a set of dag runs for the given search criteria. + Return a set of dag runs for the given search criteria. :param dag_id: the dag_id or list of dag_id to find dag runs for :param run_id: defines the run id for this dag run @@ -462,7 +462,7 @@ def get_task_instances( state: Iterable[TaskInstanceState | None] | None = None, session: Session = NEW_SESSION, ) -> list[TI]: - """Returns the task instances for this dag run.""" + """Return the task instances for this dag run.""" tis = ( select(TI) .options(joinedload(TI.dag_run)) @@ -499,7 +499,7 @@ def get_task_instance( map_index: int = -1, ) -> TI | None: """ - Returns the task instance specified by task_id for this dag run. + Return the task instance specified by task_id for this dag run. :param task_id: the task id :param session: Sqlalchemy ORM Session @@ -510,7 +510,7 @@ def get_task_instance( def get_dag(self) -> DAG: """ - Returns the Dag associated with this DagRun. + Return the Dag associated with this DagRun. :return: DAG """ @@ -523,7 +523,7 @@ def get_dag(self) -> DAG: def get_previous_dagrun( self, state: DagRunState | None = None, session: Session = NEW_SESSION ) -> DagRun | None: - """The previous DagRun, if there is one.""" + """Return the previous DagRun, if there is one.""" filters = [ DagRun.dag_id == self.dag_id, DagRun.execution_date < self.execution_date, @@ -534,7 +534,7 @@ def get_previous_dagrun( @provide_session def get_previous_scheduled_dagrun(self, session: Session = NEW_SESSION) -> DagRun | None: - """The previous, SCHEDULED DagRun, if there is one.""" + """Return the previous SCHEDULED DagRun, if there is one.""" return session.scalar( select(DagRun) .where( @@ -575,7 +575,7 @@ def update_state( self, session: Session = NEW_SESSION, execute_callbacks: bool = True ) -> tuple[list[TI], DagCallbackRequest | None]: """ - Determines the overall state of the DagRun based on the state of its TaskInstances. + Determine the overall state of the DagRun based on the state of its TaskInstances. :param session: Sqlalchemy ORM Session :param execute_callbacks: Should dag callbacks (success/failure, SLA etc.) be invoked @@ -973,7 +973,7 @@ def _emit_duration_stats_for_finished_state(self): @provide_session def verify_integrity(self, *, session: Session = NEW_SESSION) -> None: """ - Verifies the DagRun by checking for removed tasks or tasks that are not in the database yet. + Verify the DagRun by checking for removed tasks or tasks that are not in the database yet. It will set state to removed or add the task if required. @@ -1297,7 +1297,7 @@ def is_backfill(self) -> bool: @classmethod @provide_session def get_latest_runs(cls, session: Session = NEW_SESSION) -> list[DagRun]: - """Returns the latest DagRun for each DAG.""" + """Return the latest DagRun for each DAG.""" subquery = ( select(cls.dag_id, func.max(cls.execution_date).label("execution_date")) .group_by(cls.dag_id) diff --git a/airflow/models/mappedoperator.py b/airflow/models/mappedoperator.py index 82dcc82aa060..cbdc8ca05634 100644 --- a/airflow/models/mappedoperator.py +++ b/airflow/models/mappedoperator.py @@ -561,18 +561,18 @@ def doc_rst(self) -> str | None: return self.partial_kwargs.get("doc_rst") def get_dag(self) -> DAG | None: - """Implementing Operator.""" + """Implement Operator.""" return self.dag @property def output(self) -> XComArg: - """Returns reference to XCom pushed by current operator.""" + """Return reference to XCom pushed by current operator.""" from airflow.models.xcom_arg import XComArg return XComArg(operator=self) def serialize_for_task_group(self) -> tuple[DagAttributeTypes, Any]: - """Implementing DAGNode.""" + """Implement DAGNode.""" return DagAttributeTypes.OP, self.task_id def _expand_mapped_kwargs(self, context: Context, session: Session) -> tuple[Mapping[str, Any], set[int]]: diff --git a/airflow/models/param.py b/airflow/models/param.py index f4ae8067de50..bea4333cf510 100644 --- a/airflow/models/param.py +++ b/airflow/models/param.py @@ -100,7 +100,7 @@ def _warn_if_not_rfc3339_dt(value): def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: """ - Runs the validations and returns the Param's final value. + Run the validations and returns the Param's final value. May raise ValueError on failed validations, or TypeError if no value is passed and no value already exists. @@ -267,11 +267,11 @@ def update(self, *args, **kwargs) -> None: super().update(*args, **kwargs) def dump(self) -> dict[str, Any]: - """Dumps the ParamsDict object as a dictionary, while suppressing exceptions.""" + """Dump the ParamsDict object as a dictionary, while suppressing exceptions.""" return {k: v.resolve(suppress_exception=True) for k, v in self.items()} def validate(self) -> dict[str, Any]: - """Validates & returns all the Params object stored in the dictionary.""" + """Validate & returns all the Params object stored in the dictionary.""" resolved_dict = {} try: for k, v in self.items(): diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 82d4c6e31c3c..62a56cf7734e 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -129,7 +129,7 @@ def write_dag( session: Session = NEW_SESSION, ) -> bool: """ - Serializes a DAG and writes it into database. + Serialize a DAG and writes it into database. If the record already exists, it checks if the Serialized DAG changed or not. If it is changed, it updates the record, ignores otherwise. @@ -174,7 +174,7 @@ def write_dag( @classmethod @provide_session def read_all_dags(cls, session: Session = NEW_SESSION) -> dict[str, SerializedDAG]: - """Reads all DAGs in serialized_dag table. + """Read all DAGs in serialized_dag table. :param session: ORM Session :returns: a dict of DAGs read from database @@ -224,7 +224,7 @@ def dag(self) -> SerializedDAG: @provide_session def remove_dag(cls, dag_id: str, session: Session = NEW_SESSION) -> None: """ - Deletes a DAG with given dag_id. + Delete a DAG with given dag_id. :param dag_id: dag_id to be deleted :param session: ORM Session. @@ -239,7 +239,7 @@ def remove_deleted_dags( processor_subdir: str | None = None, session: Session = NEW_SESSION, ) -> None: - """Deletes DAGs not included in alive_dag_filelocs. + """Delete DAGs not included in alive_dag_filelocs. :param alive_dag_filelocs: file paths of alive DAGs :param session: ORM Session @@ -266,7 +266,7 @@ def remove_deleted_dags( @classmethod @provide_session def has_dag(cls, dag_id: str, session: Session = NEW_SESSION) -> bool: - """Checks a DAG exist in serialized_dag table. + """Check a DAG exist in serialized_dag table. :param dag_id: the DAG to check :param session: ORM Session @@ -310,7 +310,7 @@ def bulk_sync_to_db( session: Session = NEW_SESSION, ) -> None: """ - Saves DAGs as Serialized DAG objects in the database. + Save DAGs as Serialized DAG objects in the database. Each DAG is saved in a separate database query. diff --git a/airflow/models/skipmixin.py b/airflow/models/skipmixin.py index 5ddc80308a38..c8feb58d91d4 100644 --- a/airflow/models/skipmixin.py +++ b/airflow/models/skipmixin.py @@ -64,7 +64,7 @@ def _set_state_to_skipped( tasks: Sequence[str] | Sequence[tuple[str, int]], session: Session, ) -> None: - """Used internally to set state of task instances to skipped from the same dag run.""" + """Set state of task instances to skipped from the same dag run.""" if tasks: now = timezone.utcnow() TI = TaskInstance @@ -96,7 +96,7 @@ def skip( map_index: int = -1, ): """ - Sets tasks instances to skipped from the same dag run. + Set tasks instances to skipped from the same dag run. If this instance has a `task_id` attribute, store the list of skipped task IDs to XCom so that NotPreviouslySkippedDep knows these tasks should be skipped when they @@ -162,7 +162,7 @@ def skip_all_except( branch_task_ids: None | str | Iterable[str], ): """ - This method implements the logic for a branching operator. + Implement the logic for a branching operator. Given a single task ID or list of task IDs to follow, this skips all other tasks immediately downstream of this operator. diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 72105c6a9c5f..74cc5e45ffda 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -169,7 +169,7 @@ class TaskReturnCode(Enum): @contextlib.contextmanager def set_current_context(context: Context) -> Generator[Context, None, None]: """ - Sets the current execution context to the provided context object. + Set the current execution context to the provided context object. This method should be called once per Task execution, before calling operator.execute. """ @@ -222,7 +222,7 @@ def clear_task_instances( dag_run_state: DagRunState | Literal[False] = DagRunState.QUEUED, ) -> None: """ - Clears a set of task instances, but makes sure the running ones get killed. + Clear a set of task instances, but make sure the running ones get killed. Also sets Dagrun's `state` to QUEUED and `start_date` to the time of execution. But only for finished DRs (SUCCESS and FAILED). @@ -362,7 +362,7 @@ def _is_mappable_value(value: Any) -> TypeGuard[Collection]: def _creator_note(val): - """Custom creator for the ``note`` association proxy.""" + """Creator the ``note`` association proxy.""" if isinstance(val, str): return TaskInstanceNote(content=val) elif isinstance(val, dict): @@ -653,7 +653,7 @@ def command_as_list( cfg_path=None, ) -> list[str]: """ - Returns a command that can be executed anywhere where airflow is installed. + Return a command that can be executed anywhere where airflow is installed. This command is part of the message sent to executors by the orchestrator. """ @@ -719,7 +719,7 @@ def generate_command( map_index: int = -1, ) -> list[str]: """ - Generates the shell command required to execute this task instance. + Generate the shell command required to execute this task instance. :param dag_id: DAG ID :param task_id: Task ID @@ -822,7 +822,7 @@ def current_state(self, session: Session = NEW_SESSION) -> str: @provide_session def error(self, session: Session = NEW_SESSION) -> None: """ - Forces the task instance's state to FAILED in the database. + Force the task instance's state to FAILED in the database. :param session: SQLAlchemy ORM Session """ @@ -834,7 +834,7 @@ def error(self, session: Session = NEW_SESSION) -> None: @provide_session def refresh_from_db(self, session: Session = NEW_SESSION, lock_for_update: bool = False) -> None: """ - Refreshes the task instance from the database based on the primary key. + Refresh the task instance from the database based on the primary key. :param session: SQLAlchemy ORM Session :param lock_for_update: if True, indicates that the database should @@ -971,7 +971,7 @@ def is_premature(self) -> bool: @provide_session def are_dependents_done(self, session: Session = NEW_SESSION) -> bool: """ - Checks whether the immediate dependents of this task instance have succeeded or have been skipped. + Check whether the immediate dependents of this task instance have succeeded or have been skipped. This is meant to be used by wait_for_downstream. @@ -1001,7 +1001,7 @@ def get_previous_dagrun( state: DagRunState | None = None, session: Session | None = None, ) -> DagRun | None: - """The DagRun that ran before this task instance's DagRun. + """Return the DagRun that ran before this task instance's DagRun. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session. @@ -1035,7 +1035,7 @@ def get_previous_ti( session: Session = NEW_SESSION, ) -> TaskInstance | None: """ - The task instance for the task that ran before this task instance. + Return the task instance for the task that ran before this task instance. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session @@ -1086,7 +1086,7 @@ def get_previous_execution_date( session: Session = NEW_SESSION, ) -> pendulum.DateTime | None: """ - The execution date from property previous_ti_success. + Return the execution date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session @@ -1100,7 +1100,7 @@ def get_previous_start_date( self, state: DagRunState | None = None, session: Session = NEW_SESSION ) -> pendulum.DateTime | None: """ - The start date from property previous_ti_success. + Return the start date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session @@ -1225,13 +1225,13 @@ def next_retry_datetime(self): return self.end_date + delay def ready_for_retry(self) -> bool: - """Checks on whether the task instance is in the right state and timeframe to be retried.""" + """Check on whether the task instance is in the right state and timeframe to be retried.""" return self.state == TaskInstanceState.UP_FOR_RETRY and self.next_retry_datetime() < timezone.utcnow() @provide_session def get_dagrun(self, session: Session = NEW_SESSION) -> DagRun: """ - Returns the DagRun for this TaskInstance. + Return the DagRun for this TaskInstance. :param session: SQLAlchemy ORM Session :return: DagRun @@ -1269,7 +1269,7 @@ def check_and_change_state_before_execution( session: Session = NEW_SESSION, ) -> bool: """ - Checks dependencies and then sets state to RUNNING if they are met. + Check dependencies and then sets state to RUNNING if they are met. Returns True if and only if state is set to RUNNING, which implies that task should be executed, in preparation for _run_raw_task. @@ -1407,7 +1407,7 @@ def _log_state(self, lead_msg: str = "") -> None: def emit_state_change_metric(self, new_state: TaskInstanceState) -> None: """ - Sends a time metric representing how much time a given state transition took. + Send a time metric representing how much time a given state transition took. The previous state and metric name is deduced from the state the task was put in. @@ -1705,7 +1705,7 @@ def _run_finished_callback( ) def _execute_task(self, context, task_orig): - """Executes Task (optionally with a Timeout) and pushes Xcom results.""" + """Execute Task (optionally with a Timeout) and push Xcom results.""" task_to_execute = self.task # If the task has been deferred and is being executed due to a trigger, # then we need to pick the right method to come back to, otherwise @@ -1760,7 +1760,7 @@ def _execute_task(self, context, task_orig): @provide_session def _defer_task(self, session: Session, defer: TaskDeferred) -> None: - """Marks the task as deferred and sets up the trigger that is needed to resume it.""" + """Mark the task as deferred and sets up the trigger that is needed to resume it.""" from airflow.models.trigger import Trigger # First, make the trigger entry @@ -1905,7 +1905,7 @@ def _handle_reschedule( @staticmethod def get_truncated_error_traceback(error: BaseException, truncate_to: Callable) -> TracebackType | None: """ - Truncates the traceback of an exception to the first frame called from within a given function. + Truncate the traceback of an exception to the first frame called from within a given function. :param error: exception to get traceback from :param truncate_to: Function to truncate TB to. Must have a ``__code__`` attribute @@ -2605,13 +2605,13 @@ def get_num_running_task_instances(self, session: Session, same_dagrun=False) -> return num_running_task_instances_query.scalar() def init_run_context(self, raw: bool = False) -> None: - """Sets the log context.""" + """Set the log context.""" self.raw = raw self._set_context(self) @staticmethod def filter_for_tis(tis: Iterable[TaskInstance | TaskInstanceKey]) -> BooleanClauseList | None: - """Returns SQLAlchemy filter to query selected task instances.""" + """Return SQLAlchemy filter to query selected task instances.""" # DictKeys type, (what we often pass here from the scheduler) is not directly indexable :( # Or it might be a generator, but we need to be able to iterate over it more than once tis = list(tis) @@ -2726,7 +2726,7 @@ def ti_selector_condition(cls, vals: Collection[str | tuple[str, int]]) -> Colum @provide_session def schedule_downstream_tasks(self, session: Session = NEW_SESSION, max_tis_per_query: int | None = None): """ - The mini-scheduler for scheduling downstream tasks of this task instance. + Schedule downstream tasks of this task instance. :meta: private """ diff --git a/airflow/models/taskinstancekey.py b/airflow/models/taskinstancekey.py index 47ed1d4d5422..50906e47b0a3 100644 --- a/airflow/models/taskinstancekey.py +++ b/airflow/models/taskinstancekey.py @@ -42,7 +42,7 @@ def reduced(self) -> TaskInstanceKey: ) def with_try_number(self, try_number: int) -> TaskInstanceKey: - """Returns TaskInstanceKey with provided ``try_number``.""" + """Return TaskInstanceKey with provided ``try_number``.""" return TaskInstanceKey(self.dag_id, self.task_id, self.run_id, try_number, self.map_index) @property diff --git a/airflow/models/taskmixin.py b/airflow/models/taskmixin.py index c5e7cea3b92e..98c29cf3a37e 100644 --- a/airflow/models/taskmixin.py +++ b/airflow/models/taskmixin.py @@ -94,22 +94,22 @@ def update_relative( """ def __lshift__(self, other: DependencyMixin | Sequence[DependencyMixin]): - """Implements Task << Task.""" + """Implement Task << Task.""" self.set_upstream(other) return other def __rshift__(self, other: DependencyMixin | Sequence[DependencyMixin]): - """Implements Task >> Task.""" + """Implement Task >> Task.""" self.set_downstream(other) return other def __rrshift__(self, other: DependencyMixin | Sequence[DependencyMixin]): - """Called for Task >> [Task] because list don't have __rshift__ operators.""" + """Implement Task >> [Task] because list don't have __rshift__ operators.""" self.__lshift__(other) return self def __rlshift__(self, other: DependencyMixin | Sequence[DependencyMixin]): - """Called for Task << [Task] because list don't have __lshift__ operators.""" + """Implement Task << [Task] because list don't have __lshift__ operators.""" self.__rshift__(other) return self @@ -201,7 +201,7 @@ def _set_relatives( upstream: bool = False, edge_modifier: EdgeModifier | None = None, ) -> None: - """Sets relatives for the task or task list.""" + """Set relatives for the task or task list.""" from airflow.models.baseoperator import BaseOperator from airflow.models.mappedoperator import MappedOperator @@ -297,5 +297,5 @@ def get_direct_relatives(self, upstream: bool = False) -> Iterable[DAGNode]: return self.downstream_list def serialize_for_task_group(self) -> tuple[DagAttributeTypes, Any]: - """This is used by TaskGroupSerialization to serialize a task group's content.""" + """Serialize a task group's content; used by TaskGroupSerialization.""" raise NotImplementedError() diff --git a/airflow/models/taskreschedule.py b/airflow/models/taskreschedule.py index 7b642f0a266b..f4ac7408f415 100644 --- a/airflow/models/taskreschedule.py +++ b/airflow/models/taskreschedule.py @@ -103,7 +103,7 @@ def query_for_task_instance( try_number: int | None = None, ) -> Query: """ - Returns query for task reschedules for a given the task instance. + Return query for task reschedules for a given the task instance. :param session: the database session object :param task_instance: the task instance to find task reschedules for @@ -135,7 +135,7 @@ def find_for_task_instance( try_number: int | None = None, ) -> list[TaskReschedule]: """ - Returns all task reschedules for the task instance and try number, in ascending order. + Return all task reschedules for the task instance and try number, in ascending order. :param session: the database session object :param task_instance: the task instance to find task reschedules for diff --git a/airflow/models/trigger.py b/airflow/models/trigger.py index 0161d5c9fe56..6ba1d45852b0 100644 --- a/airflow/models/trigger.py +++ b/airflow/models/trigger.py @@ -92,7 +92,7 @@ def from_object(cls, trigger: BaseTrigger) -> Trigger: @internal_api_call @provide_session def bulk_fetch(cls, ids: Iterable[int], session: Session = NEW_SESSION) -> dict[int, Trigger]: - """Fetches all the Triggers by ID and returns a dict mapping ID -> Trigger instance.""" + """Fetch all the Triggers by ID and return a dict mapping ID -> Trigger instance.""" query = session.scalars( select(cls) .where(cls.id.in_(ids)) @@ -108,7 +108,7 @@ def bulk_fetch(cls, ids: Iterable[int], session: Session = NEW_SESSION) -> dict[ @internal_api_call @provide_session def clean_unused(cls, session: Session = NEW_SESSION) -> None: - """Deletes all triggers that have no tasks dependent on them. + """Delete all triggers that have no tasks dependent on them. Triggers have a one-to-many relationship to task instances, so we need to clean those up first. Afterwards we can drop the triggers not @@ -141,7 +141,7 @@ def clean_unused(cls, session: Session = NEW_SESSION) -> None: @internal_api_call @provide_session def submit_event(cls, trigger_id, event, session: Session = NEW_SESSION) -> None: - """Takes an event from an instance of itself, and triggers all dependent tasks to resume.""" + """Take an event from an instance of itself, and trigger all dependent tasks to resume.""" for task_instance in session.scalars( select(TaskInstance).where( TaskInstance.trigger_id == trigger_id, TaskInstance.state == TaskInstanceState.DEFERRED @@ -193,7 +193,7 @@ def submit_failure(cls, trigger_id, exc=None, session: Session = NEW_SESSION) -> @internal_api_call @provide_session def ids_for_triggerer(cls, triggerer_id, session: Session = NEW_SESSION) -> list[int]: - """Retrieves a list of triggerer_ids.""" + """Retrieve a list of triggerer_ids.""" return session.scalars(select(cls.id).where(cls.triggerer_id == triggerer_id)).all() @classmethod diff --git a/airflow/models/variable.py b/airflow/models/variable.py index a434a29e2b57..5ca7774cbf06 100644 --- a/airflow/models/variable.py +++ b/airflow/models/variable.py @@ -126,7 +126,7 @@ def get( default_var: Any = __NO_DEFAULT_SENTINEL, deserialize_json: bool = False, ) -> Any: - """Gets a value for an Airflow Variable Key. + """Get a value for an Airflow Variable Key. :param key: Variable Key :param default_var: Default value of the Variable if the Variable doesn't exist @@ -157,7 +157,7 @@ def set( serialize_json: bool = False, session: Session = None, ) -> None: - """Sets a value for an Airflow Variable with a given Key. + """Set a value for an Airflow Variable with a given Key. This operation overwrites an existing variable. @@ -190,7 +190,7 @@ def update( serialize_json: bool = False, session: Session = None, ) -> None: - """Updates a given Airflow Variable with the Provided value. + """Update a given Airflow Variable with the Provided value. :param key: Variable Key :param value: Value to set for the Variable @@ -227,7 +227,7 @@ def rotate_fernet_key(self): @staticmethod def check_for_write_conflict(key: str) -> None: - """Logs a warning if a variable exists outside of the metastore. + """Log a warning if a variable exists outside the metastore. If we try to write a variable to the metastore while the same key exists in an environment variable or custom secrets backend, then diff --git a/airflow/models/xcom.py b/airflow/models/xcom.py index 50e62ea29ff6..637da3504df6 100644 --- a/airflow/models/xcom.py +++ b/airflow/models/xcom.py @@ -122,7 +122,7 @@ class BaseXCom(Base, LoggingMixin): @reconstructor def init_on_load(self): """ - Called by the ORM after the instance has been loaded from the DB or otherwise reconstituted. + Execute after the instance has been loaded from the DB or otherwise reconstituted; called by the ORM. i.e automatically deserialize Xcom value when loading from DB. """ @@ -838,7 +838,7 @@ def _shim(**kwargs): def _get_function_params(function) -> list[str]: """ - Returns the list of variables names of a function. + Return the list of variables names of a function. :param function: The function to inspect """ @@ -850,10 +850,10 @@ def _get_function_params(function) -> list[str]: def resolve_xcom_backend() -> type[BaseXCom]: - """Resolves custom XCom class. + """Resolve custom XCom class. - Confirms that custom XCom class extends the BaseXCom. - Compares the function signature of the custom XCom serialize_value to the base XCom serialize_value. + Confirm that custom XCom class extends the BaseXCom. + Compare the function signature of the custom XCom serialize_value to the base XCom serialize_value. """ clazz = conf.getimport("core", "xcom_backend", fallback=f"airflow.models.xcom.{BaseXCom.__name__}") if not clazz: diff --git a/airflow/models/xcom_arg.py b/airflow/models/xcom_arg.py index 0a54d18ff7b9..a19aa6703f3a 100644 --- a/airflow/models/xcom_arg.py +++ b/airflow/models/xcom_arg.py @@ -85,11 +85,11 @@ class XComArg(ResolveMixin, DependencyMixin): @overload def __new__(cls: type[XComArg], operator: Operator, key: str = XCOM_RETURN_KEY) -> XComArg: - """Called when the user writes ``XComArg(...)`` directly.""" + """Execute when the user writes ``XComArg(...)`` directly.""" @overload def __new__(cls: type[XComArg]) -> XComArg: - """Called by Python internals from subclasses.""" + """Execute by Python internals from subclasses.""" def __new__(cls, *args, **kwargs) -> XComArg: if cls is XComArg: @@ -155,7 +155,8 @@ def set_downstream( operator.set_downstream(task_or_task_list, edge_modifier) def _serialize(self) -> dict[str, Any]: - """Called by DAG serialization. + """ + Serialize a DAG. The implementation should be the inverse function to ``deserialize``, returning a data dict converted from this XComArg derivative. DAG @@ -167,7 +168,8 @@ def _serialize(self) -> dict[str, Any]: @classmethod def _deserialize(cls, data: dict[str, Any], dag: DAG) -> XComArg: - """Called when deserializing a DAG. + """ + Deserialize a DAG. The implementation should be the inverse function to ``serialize``, implementing given a data dict converted from this XComArg derivative, @@ -246,7 +248,7 @@ def __eq__(self, other: Any) -> bool: return self.operator == other.operator and self.key == other.key def __getitem__(self, item: str) -> XComArg: - """Implements xcomresult['some_result_key'].""" + """Implement xcomresult['some_result_key'].""" if not isinstance(item, str): raise ValueError(f"XComArg only supports str lookup, received {type(item).__name__}") return PlainXComArg(operator=self.operator, key=item) diff --git a/airflow/notifications/basenotifier.py b/airflow/notifications/basenotifier.py index 7ef0603be14e..81552404e151 100644 --- a/airflow/notifications/basenotifier.py +++ b/airflow/notifications/basenotifier.py @@ -73,7 +73,7 @@ def render_template_fields( @abstractmethod def notify(self, context: Context) -> None: """ - Sends a notification. + Send a notification. :param context: The airflow context """ diff --git a/airflow/operators/bash.py b/airflow/operators/bash.py index 6ec1b0e80d6f..9a188c2cc565 100644 --- a/airflow/operators/bash.py +++ b/airflow/operators/bash.py @@ -172,7 +172,7 @@ def subprocess_hook(self): return SubprocessHook() def get_env(self, context): - """Builds the set of environment variables to be exposed for the bash command.""" + """Build the set of environment variables to be exposed for the bash command.""" system_env = os.environ.copy() env = self.env if env is None: diff --git a/airflow/operators/datetime.py b/airflow/operators/datetime.py index 7c3c648130c1..e6f47912ad9a 100644 --- a/airflow/operators/datetime.py +++ b/airflow/operators/datetime.py @@ -99,7 +99,7 @@ def target_times_as_dates( lower: datetime.datetime | datetime.time | None, upper: datetime.datetime | datetime.time | None, ): - """Ensures upper and lower time targets are datetimes by combining them with base_date.""" + """Ensure upper and lower time targets are datetimes by combining them with base_date.""" if isinstance(lower, datetime.datetime) and isinstance(upper, datetime.datetime): return lower, upper diff --git a/airflow/operators/python.py b/airflow/operators/python.py index b3627a48ba19..e12e9bc4e9e1 100644 --- a/airflow/operators/python.py +++ b/airflow/operators/python.py @@ -57,7 +57,7 @@ def is_venv_installed() -> bool: """ - Checks if the virtualenv package is installed via checking if it is on the path or installed as package. + Check if the virtualenv package is installed via checking if it is on the path or installed as package. :return: True if it is. Whichever way of checking it works, is fine. """ @@ -67,7 +67,7 @@ def is_venv_installed() -> bool: def task(python_callable: Callable | None = None, multiple_outputs: bool | None = None, **kwargs): - """Deprecated. Use :func:`airflow.decorators.task` instead. + """Use :func:`airflow.decorators.task` instead, this is deprecated. Calls ``@task.python`` and allows users to turn a Python function into an Airflow task. @@ -202,7 +202,7 @@ def determine_kwargs(self, context: Mapping[str, Any]) -> Mapping[str, Any]: def execute_callable(self) -> Any: """ - Calls the python callable with the given arguments. + Call the python callable with the given arguments. :return: the return value of the call. """ diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256 index 0594dd21e83b..2a70c2746a49 100644 --- a/docs/apache-airflow/img/airflow_erd.sha256 +++ b/docs/apache-airflow/img/airflow_erd.sha256 @@ -1 +1 @@ -266a8533dca6d9c7e984341bfe29e99f29217e44529455a49e7c4a5faccdcdf3 \ No newline at end of file +52cd8c81c9f586992515a903d882e72e8df0a55da5021d2614ba1807b8b63f7e \ No newline at end of file diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index 8bf922245e25..ba548c60fddd 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -1228,28 +1228,28 @@ task_instance--xcom -1 +0..N 1 task_instance--xcom -0..N +1 1 task_instance--xcom -1 +0..N 1 task_instance--xcom -0..N +1 1 From 5988ea6b0cf57d1a25d4a7b8ea3519d6607bac53 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 12 Aug 2023 11:42:35 -0700 Subject: [PATCH 042/117] D401 Support - Utils (#33339) (cherry picked from commit 827962878e6fb39e014639d83cff7d0881595ecb) --- airflow/utils/cli.py | 26 +++++++++---------- airflow/utils/cli_action_loggers.py | 11 ++++---- airflow/utils/cli_app_builder.py | 2 +- airflow/utils/code_utils.py | 4 +-- airflow/utils/configuration.py | 2 +- airflow/utils/dag_cycle_tester.py | 2 +- airflow/utils/dates.py | 2 +- airflow/utils/db.py | 13 +++++----- airflow/utils/decorators.py | 4 +-- airflow/utils/dot_renderer.py | 6 ++--- airflow/utils/edgemodifier.py | 6 ++--- airflow/utils/email.py | 4 +-- airflow/utils/empty_set.py | 2 +- airflow/utils/event_scheduler.py | 2 +- airflow/utils/file.py | 8 +++--- airflow/utils/helpers.py | 20 +++++++------- airflow/utils/json.py | 2 +- airflow/utils/log/file_task_handler.py | 4 +-- airflow/utils/log/log_reader.py | 6 ++--- airflow/utils/log/logging_mixin.py | 8 +++--- airflow/utils/log/secrets_masker.py | 8 ++++-- airflow/utils/operator_helpers.py | 2 +- airflow/utils/platform.py | 2 +- airflow/utils/process_utils.py | 2 +- .../utils/providers_configuration_loader.py | 3 +-- airflow/utils/python_virtualenv.py | 5 ++-- airflow/utils/retries.py | 3 ++- airflow/utils/serve_logs.py | 2 +- airflow/utils/session.py | 2 +- airflow/utils/setup_teardown.py | 17 +++++++----- airflow/utils/sqlalchemy.py | 10 ++++--- airflow/utils/state.py | 2 +- airflow/utils/task_group.py | 18 +++++++------ airflow/utils/timeout.py | 4 +-- airflow/utils/timezone.py | 6 ++--- airflow/utils/trigger_rule.py | 4 +-- airflow/utils/types.py | 2 +- airflow/utils/weekday.py | 2 +- airflow/utils/weight_rule.py | 2 +- 39 files changed, 122 insertions(+), 108 deletions(-) diff --git a/airflow/utils/cli.py b/airflow/utils/cli.py index b601c4c07e2c..0682e6531335 100644 --- a/airflow/utils/cli.py +++ b/airflow/utils/cli.py @@ -62,7 +62,7 @@ def _check_cli_args(args): def action_cli(func=None, check_db=True): def action_logging(f: T) -> T: """ - Decorates function to execute function at the same time submitting action_logging but in CLI context. + Decorate function to execute function at the same time submitting action_logging but in CLI context. It will call action logger callbacks twice, one for pre-execution and the other one for post-execution. @@ -86,7 +86,7 @@ def action_logging(f: T) -> T: @functools.wraps(f) def wrapper(*args, **kwargs): """ - A wrapper for cli functions; assumes Namespace instance as first positional argument. + Wrap cli functions; assume Namespace instance as first positional argument. :param args: Positional argument. It assumes to have Namespace instance at 1st positional argument @@ -127,7 +127,7 @@ def wrapper(*args, **kwargs): def _build_metrics(func_name, namespace): """ - Builds metrics dict from function args. + Build metrics dict from function args. It assumes that function arguments is from airflow.bin.cli module's function and has Namespace instance where it optionally contains "dag_id", "task_id", @@ -173,7 +173,7 @@ def _build_metrics(func_name, namespace): def process_subdir(subdir: str | None): - """Expands path to absolute by replacing 'DAGS_FOLDER', '~', '.', etc.""" + """Expand path to absolute by replacing 'DAGS_FOLDER', '~', '.', etc.""" if subdir: if not settings.DAGS_FOLDER: raise ValueError("DAGS_FOLDER variable in settings should be filled.") @@ -183,7 +183,7 @@ def process_subdir(subdir: str | None): def get_dag_by_file_location(dag_id: str): - """Returns DAG of a given dag_id by looking up file location.""" + """Return DAG of a given dag_id by looking up file location.""" from airflow.models import DagBag, DagModel # Benefit is that logging from other dags in dagbag will not appear @@ -219,7 +219,7 @@ def _search_for_dag_file(val: str | None) -> str | None: def get_dag(subdir: str | None, dag_id: str, from_db: bool = False) -> DAG: """ - Returns DAG of a given dag_id. + Return DAG of a given dag_id. First we'll try to use the given subdir. If that doesn't work, we'll try to find the correct path (assuming it's a file) and failing that, use the configured @@ -248,7 +248,7 @@ def get_dag(subdir: str | None, dag_id: str, from_db: bool = False) -> DAG: def get_dags(subdir: str | None, dag_id: str, use_regex: bool = False): - """Returns DAG(s) matching a given regex or dag_id.""" + """Return DAG(s) matching a given regex or dag_id.""" from airflow.models import DagBag if not use_regex: @@ -276,7 +276,7 @@ def get_dag_by_pickle(pickle_id: int, session: Session = NEW_SESSION) -> DAG: def setup_locations(process, pid=None, stdout=None, stderr=None, log=None): - """Creates logging paths.""" + """Create logging paths.""" if not stderr: stderr = os.path.join(settings.AIRFLOW_HOME, f"airflow-{process}.err") if not stdout: @@ -293,7 +293,7 @@ def setup_locations(process, pid=None, stdout=None, stderr=None, log=None): def setup_logging(filename): - """Creates log file handler for daemon process.""" + """Create log file handler for daemon process.""" root = logging.getLogger() handler = NonCachingFileHandler(filename) formatter = logging.Formatter(settings.SIMPLE_LOG_FORMAT) @@ -306,7 +306,7 @@ def setup_logging(filename): def sigint_handler(sig, frame): """ - Returns without error on SIGINT or SIGTERM signals in interactive command mode. + Return without error on SIGINT or SIGTERM signals in interactive command mode. e.g. CTRL+C or kill """ @@ -315,7 +315,7 @@ def sigint_handler(sig, frame): def sigquit_handler(sig, frame): """ - Helps debug deadlocks by printing stacktraces when this gets a SIGQUIT. + Help debug deadlocks by printing stacktraces when this gets a SIGQUIT. e.g. kill -s QUIT or CTRL+ """ @@ -340,7 +340,7 @@ class ColorMode: def should_use_colors(args) -> bool: - """Processes arguments and decides whether to enable color in output.""" + """Process arguments and decide whether to enable color in output.""" if args.color == ColorMode.ON: return True if args.color == ColorMode.OFF: @@ -361,7 +361,7 @@ def should_ignore_depends_on_past(args) -> bool: def suppress_logs_and_warning(f: T) -> T: - """Decorator to suppress logging and warning messages in cli functions.""" + """Suppress logging and warning messages in cli functions.""" @functools.wraps(f) def _wrapper(*args, **kwargs): diff --git a/airflow/utils/cli_action_loggers.py b/airflow/utils/cli_action_loggers.py index 575f4bfb08ca..0f5f346db3e2 100644 --- a/airflow/utils/cli_action_loggers.py +++ b/airflow/utils/cli_action_loggers.py @@ -29,7 +29,7 @@ def register_pre_exec_callback(action_logger): - """Registers more action_logger function callback for pre-execution. + """Register more action_logger function callback for pre-execution. This function callback is expected to be called with keyword args. For more about the arguments that is being passed to the callback, @@ -43,7 +43,7 @@ def register_pre_exec_callback(action_logger): def register_post_exec_callback(action_logger): - """Registers more action_logger function callback for post-execution. + """Register more action_logger function callback for post-execution. This function callback is expected to be called with keyword args. For more about the arguments that is being passed to the callback, @@ -57,7 +57,7 @@ def register_post_exec_callback(action_logger): def on_pre_execution(**kwargs): - """Calls callbacks before execution. + """Call callbacks before execution. Note that any exception from callback will be logged but won't be propagated. @@ -73,7 +73,7 @@ def on_pre_execution(**kwargs): def on_post_execution(**kwargs): - """Calls callbacks after execution. + """Call callbacks after execution. As it's being called after execution, it can capture status of execution, duration, etc. Note that any exception from callback will be logged but @@ -91,7 +91,8 @@ def on_post_execution(**kwargs): def default_action_log(sub_command, user, task_id, dag_id, execution_date, host_name, full_command, **_): - """Default action logger callback that behaves similar to ``action_logging``. + """ + Behave similar to ``action_logging``; default action logger callback. The difference is this function uses the global ORM session, and pushes a ``Log`` row into the database instead of actually logging. diff --git a/airflow/utils/cli_app_builder.py b/airflow/utils/cli_app_builder.py index e8cf74e6ec6e..dd3a40ffd8c7 100644 --- a/airflow/utils/cli_app_builder.py +++ b/airflow/utils/cli_app_builder.py @@ -31,7 +31,7 @@ @lru_cache(maxsize=None) def _return_appbuilder(app: Flask) -> AirflowAppBuilder: - """Returns an appbuilder instance for the given app.""" + """Return an appbuilder instance for the given app.""" init_appbuilder(app) init_plugins(app) return app.appbuilder # type: ignore[attr-defined] diff --git a/airflow/utils/code_utils.py b/airflow/utils/code_utils.py index 65172a0ebe17..f43a6ce41d05 100644 --- a/airflow/utils/code_utils.py +++ b/airflow/utils/code_utils.py @@ -23,7 +23,7 @@ def get_python_source(x: Any) -> str | None: - """Helper function to get Python source (or not), preventing exceptions.""" + """Get Python source (or not), preventing exceptions.""" if isinstance(x, str): return x @@ -80,7 +80,7 @@ def prepare_code_snippet(file_path: str, line_no: int, context_lines_count: int def get_terminal_formatter(**opts): - """Returns the best formatter available in the current terminal.""" + """Return the best formatter available in the current terminal.""" if "256" in os.environ.get("TERM", ""): from pygments.formatters.terminal256 import Terminal256Formatter diff --git a/airflow/utils/configuration.py b/airflow/utils/configuration.py index 84c5e946ee1a..15c8beb4d1b7 100644 --- a/airflow/utils/configuration.py +++ b/airflow/utils/configuration.py @@ -27,7 +27,7 @@ def tmp_configuration_copy(chmod=0o600, include_env=True, include_cmds=True): """ - Returns a path for a temporary file including a full copy of the configuration settings. + Return a path for a temporary file including a full copy of the configuration settings. :param include_env: Should the value of configuration from ``AIRFLOW__`` environment variables be included or not diff --git a/airflow/utils/dag_cycle_tester.py b/airflow/utils/dag_cycle_tester.py index 4cea52200da1..3fb47371dc51 100644 --- a/airflow/utils/dag_cycle_tester.py +++ b/airflow/utils/dag_cycle_tester.py @@ -58,7 +58,7 @@ def check_cycle(dag: DAG) -> None: task_dict = dag.task_dict def _check_adjacent_tasks(task_id, current_task): - """Returns first untraversed child task, else None if all tasks traversed.""" + """Return first untraversed child task, else None if all tasks traversed.""" for adjacent_task in current_task.get_direct_relative_ids(): if visited[adjacent_task] == CYCLE_IN_PROGRESS: msg = f"Cycle detected in DAG: {dag.dag_id}. Faulty task: {task_id}" diff --git a/airflow/utils/dates.py b/airflow/utils/dates.py index a4f70c8dfec0..4f9d37d0c901 100644 --- a/airflow/utils/dates.py +++ b/airflow/utils/dates.py @@ -140,7 +140,7 @@ def round_time( delta: str | timedelta | relativedelta, start_date: datetime = timezone.make_aware(datetime.min), ): - """Returns ``start_date + i * delta`` for given ``i`` where the result is closest to ``dt``. + """Return ``start_date + i * delta`` for given ``i`` where the result is closest to ``dt``. .. code-block:: pycon diff --git a/airflow/utils/db.py b/airflow/utils/db.py index db49e37dca5a..a4e2ec87fb98 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -792,7 +792,7 @@ def _get_current_revision(session): def check_migrations(timeout): """ - Function to wait for all airflow migrations to complete. + Wait for all airflow migrations to complete. :param timeout: Timeout for the migration in seconds :return: None @@ -1382,11 +1382,12 @@ def _move_duplicate_data_to_new_table( def check_bad_references(session: Session) -> Iterable[str]: """ - Starting in Airflow 2.2, we began a process of replacing `execution_date` with `run_id` in many tables. + Go through each table and look for records that can't be mapped to a dag run. - Here we go through each table and look for records that can't be mapped to a dag run. When we find such "dangling" rows we back them up in a special table and delete them from the main table. + + Starting in Airflow 2.2, we began a process of replacing `execution_date` with `run_id` in many tables. """ from airflow.models.dagrun import DagRun from airflow.models.renderedtifields import RenderedTaskInstanceFields @@ -1536,7 +1537,7 @@ def _revision_greater(config, this_rev, base_rev): def _revisions_above_min_for_offline(config, revisions) -> None: """ - Checks that all supplied revision ids are above the minimum revision for the dialect. + Check that all supplied revision ids are above the minimum revision for the dialect. :param config: Alembic config :param revisions: list of Alembic revision ids @@ -1727,7 +1728,7 @@ def downgrade(*, to_revision, from_revision=None, show_sql_only=False, session: def drop_airflow_models(connection): """ - Drops all airflow models. + Drop all airflow models. :param connection: SQLAlchemy Connection :return: None @@ -1762,7 +1763,7 @@ def drop_airflow_moved_tables(connection): @provide_session def check(session: Session = NEW_SESSION): """ - Checks if the database works. + Check if the database works. :param session: session of the sqlalchemy """ diff --git a/airflow/utils/decorators.py b/airflow/utils/decorators.py index eaa624f6cfb6..21b6ff341248 100644 --- a/airflow/utils/decorators.py +++ b/airflow/utils/decorators.py @@ -30,7 +30,7 @@ def apply_defaults(func: T) -> T: """ - This decorator is deprecated. + Use apply_default decorator for the `default_args` feature to work properly; deprecated. In previous versions, all subclasses of BaseOperator must use apply_default decorator for the" `default_args` feature to work properly. @@ -58,7 +58,7 @@ def wrapper(*args, **kwargs): def remove_task_decorator(python_source: str, task_decorator_name: str) -> str: """ - Removes @task or similar decorators as well as @setup and @teardown. + Remove @task or similar decorators as well as @setup and @teardown. :param python_source: The python source code :param task_decorator_name: the decorator name diff --git a/airflow/utils/dot_renderer.py b/airflow/utils/dot_renderer.py index f168566f7e33..7b895f739f97 100644 --- a/airflow/utils/dot_renderer.py +++ b/airflow/utils/dot_renderer.py @@ -37,7 +37,7 @@ def _refine_color(color: str): """ - Converts color in #RGB (12 bits) format to #RRGGBB (32 bits), if it possible. + Convert color in #RGB (12 bits) format to #RRGGBB (32 bits), if it possible. Otherwise, it returns the original value. Graphviz does not support colors in #RGB format. @@ -144,7 +144,7 @@ def _draw_nodes( def render_dag_dependencies(deps: dict[str, list[DagDependency]]) -> graphviz.Digraph: """ - Renders the DAG dependency to the DOT object. + Render the DAG dependency to the DOT object. :param deps: List of DAG dependencies :return: Graphviz object @@ -169,7 +169,7 @@ def render_dag_dependencies(deps: dict[str, list[DagDependency]]) -> graphviz.Di def render_dag(dag: DAG, tis: list[TaskInstance] | None = None) -> graphviz.Digraph: """ - Renders the DAG object to the DOT object. + Render the DAG object to the DOT object. If an task instance list is passed, the nodes will be painted according to task statuses. diff --git a/airflow/utils/edgemodifier.py b/airflow/utils/edgemodifier.py index e760beb50a45..a78e6c649992 100644 --- a/airflow/utils/edgemodifier.py +++ b/airflow/utils/edgemodifier.py @@ -154,7 +154,7 @@ def set_downstream( def update_relative( self, other: DependencyMixin, upstream: bool = True, edge_modifier: EdgeModifier | None = None ) -> None: - """Called if we're not the "main" side of a relationship; we still run the same logic, though.""" + """Update relative if we're not the "main" side of a relationship; still run the same logic.""" if upstream: self.set_upstream(other) else: @@ -162,7 +162,7 @@ def update_relative( def add_edge_info(self, dag, upstream_id: str, downstream_id: str): """ - Adds or updates task info on the DAG for this specific pair of tasks. + Add or update task info on the DAG for this specific pair of tasks. Called either from our relationship trigger methods above, or directly by set_upstream/set_downstream in operators. @@ -172,5 +172,5 @@ def add_edge_info(self, dag, upstream_id: str, downstream_id: str): # Factory functions def Label(label: str): - """Creates an EdgeModifier that sets a human-readable label on the edge.""" + """Create an EdgeModifier that sets a human-readable label on the edge.""" return EdgeModifier(label=label) diff --git a/airflow/utils/email.py b/airflow/utils/email.py index 860b805e2864..8e139e5b52cd 100644 --- a/airflow/utils/email.py +++ b/airflow/utils/email.py @@ -287,7 +287,7 @@ def send_mime_email( def get_email_address_list(addresses: str | Iterable[str]) -> list[str]: """ - Returns a list of email addresses from the provided input. + Return a list of email addresses from the provided input. :param addresses: A string or iterable of strings containing email addresses. :return: A list of email addresses. @@ -305,7 +305,7 @@ def get_email_address_list(addresses: str | Iterable[str]) -> list[str]: def _get_smtp_connection(host: str, port: int, timeout: int, with_ssl: bool) -> smtplib.SMTP: """ - Returns an SMTP connection to the specified host and port, with optional SSL encryption. + Return an SMTP connection to the specified host and port, with optional SSL encryption. :param host: The hostname or IP address of the SMTP server. :param port: The port number to connect to on the SMTP server. diff --git a/airflow/utils/empty_set.py b/airflow/utils/empty_set.py index 0871c1ec5a12..eb084f9f17e2 100644 --- a/airflow/utils/empty_set.py +++ b/airflow/utils/empty_set.py @@ -20,7 +20,7 @@ def _get_empty_set_for_configuration() -> set[tuple[str, str]]: """ - Retrieves an empty_set_for_configuration. + Retrieve an empty_set_for_configuration. This method is only needed because configuration module has a deprecated method called set, and it confuses mypy. This method will be removed when we remove the deprecated method. diff --git a/airflow/utils/event_scheduler.py b/airflow/utils/event_scheduler.py index 33cc2b569086..f234be4ace35 100644 --- a/airflow/utils/event_scheduler.py +++ b/airflow/utils/event_scheduler.py @@ -31,7 +31,7 @@ def call_regular_interval( arguments=(), kwargs={}, ): - """Helper to call a function at (roughly) a given interval.""" + """Call a function at (roughly) a given interval.""" def repeat(*args, **kwargs): action(*args, **kwargs) diff --git a/airflow/utils/file.py b/airflow/utils/file.py index 2b230b61290b..9178fa4af554 100644 --- a/airflow/utils/file.py +++ b/airflow/utils/file.py @@ -119,7 +119,7 @@ def match(path: Path, rules: list[_IgnoreRule]) -> bool: def TemporaryDirectory(*args, **kwargs): - """This function is deprecated. Please use `tempfile.TemporaryDirectory`.""" + """Use `tempfile.TemporaryDirectory`, this function is deprecated.""" import warnings from tempfile import TemporaryDirectory as TmpDir @@ -134,7 +134,7 @@ def TemporaryDirectory(*args, **kwargs): def mkdirs(path, mode): """ - Creates the directory specified by path, creating intermediate directories as necessary. + Create the directory specified by path, creating intermediate directories as necessary. If directory already exists, this is a no-op. @@ -180,7 +180,7 @@ def correct_maybe_zipped(fileloc: None | str | Path) -> None | str | Path: def open_maybe_zipped(fileloc, mode="r"): """ - Opens the given file. + Open the given file. If the path contains a folder with a .zip suffix, then the folder is treated as a zip archive, opening the file inside the archive. @@ -306,7 +306,7 @@ def list_py_file_paths( def find_dag_file_paths(directory: str | os.PathLike[str], safe_mode: bool) -> list[str]: - """Finds file paths of all DAG files.""" + """Find file paths of all DAG files.""" file_paths = [] for file_path in find_path_from_directory(directory, ".airflowignore"): diff --git a/airflow/utils/helpers.py b/airflow/utils/helpers.py index d40728620469..e07608030d42 100644 --- a/airflow/utils/helpers.py +++ b/airflow/utils/helpers.py @@ -48,7 +48,7 @@ def validate_key(k: str, max_length: int = 250): - """Validates value used as a key.""" + """Validate value used as a key.""" if not isinstance(k, str): raise TypeError(f"The key has to be a string and is {type(k)}:{k}") if len(k) > max_length: @@ -61,7 +61,7 @@ def validate_key(k: str, max_length: int = 250): def validate_group_key(k: str, max_length: int = 200): - """Validates value used as a group key.""" + """Validate value used as a group key.""" if not isinstance(k, str): raise TypeError(f"The key has to be a string and is {type(k)}:{k}") if len(k) > max_length: @@ -73,7 +73,7 @@ def validate_group_key(k: str, max_length: int = 200): def alchemy_to_dict(obj: Any) -> dict | None: - """Transforms a SQLAlchemy model instance into a dictionary.""" + """Transform a SQLAlchemy model instance into a dictionary.""" if not obj: return None output = {} @@ -86,7 +86,7 @@ def alchemy_to_dict(obj: Any) -> dict | None: def ask_yesno(question: str, default: bool | None = None) -> bool: - """Helper to get a yes or no answer from the user.""" + """Get a yes or no answer from the user.""" yes = {"yes", "y"} no = {"no", "n"} @@ -162,7 +162,7 @@ def as_flattened_list(iterable: Iterable[Iterable[T]]) -> list[T]: def parse_template_string(template_string: str) -> tuple[str | None, jinja2.Template | None]: - """Parses Jinja template string.""" + """Parse Jinja template string.""" import jinja2 if "{{" in template_string: # jinja mode @@ -195,7 +195,7 @@ def render_log_filename(ti: TaskInstance, try_number, filename_template) -> str: def convert_camel_to_snake(camel_str: str) -> str: - """Converts CamelCase to snake_case.""" + """Convert CamelCase to snake_case.""" return CAMELCASE_TO_SNAKE_CASE_REGEX.sub(r"_\1", camel_str).lower() @@ -221,7 +221,7 @@ def partition(pred: Callable[[T], bool], iterable: Iterable[T]) -> tuple[Iterabl def chain(*args, **kwargs): - """This function is deprecated. Please use `airflow.models.baseoperator.chain`.""" + """Use `airflow.models.baseoperator.chain`, this function is deprecated.""" warnings.warn( "This function is deprecated. Please use `airflow.models.baseoperator.chain`.", RemovedInAirflow3Warning, @@ -231,7 +231,7 @@ def chain(*args, **kwargs): def cross_downstream(*args, **kwargs): - """This function is deprecated. Please use `airflow.models.baseoperator.cross_downstream`.""" + """Use `airflow.models.baseoperator.cross_downstream`, this function is deprecated.""" warnings.warn( "This function is deprecated. Please use `airflow.models.baseoperator.cross_downstream`.", RemovedInAirflow3Warning, @@ -296,7 +296,7 @@ def render_template_as_native(template: jinja2.Template, context: Context) -> An def exactly_one(*args) -> bool: """ - Returns True if exactly one of *args is "truthy", and False otherwise. + Return True if exactly one of *args is "truthy", and False otherwise. If user supplies an iterable, we raise ValueError and force them to unpack. """ @@ -309,7 +309,7 @@ def exactly_one(*args) -> bool: def at_most_one(*args) -> bool: """ - Returns True if at most one of *args is "truthy", and False otherwise. + Return True if at most one of *args is "truthy", and False otherwise. NOTSET is treated the same as None. diff --git a/airflow/utils/json.py b/airflow/utils/json.py index 7f05c8778d27..4d89e340c1cd 100644 --- a/airflow/utils/json.py +++ b/airflow/utils/json.py @@ -118,7 +118,7 @@ def object_hook(self, dct: dict) -> object: @staticmethod def orm_object_hook(dct: dict) -> object: - """Creates a readable representation of a serialized object.""" + """Create a readable representation of a serialized object.""" return deserialize(dct, False) diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py index 2a7bd688b5ae..1314cd7ff9b8 100644 --- a/airflow/utils/log/file_task_handler.py +++ b/airflow/utils/log/file_task_handler.py @@ -193,7 +193,7 @@ def set_context(self, ti: TaskInstance) -> None | SetContextPropagate: @staticmethod def add_triggerer_suffix(full_path, job_id=None): """ - Helper for deriving trigger log filename from task log filename. + Derive trigger log filename from task log filename. E.g. given /path/to/file.log returns /path/to/file.log.trigger.123.log, where 123 is the triggerer id. We use the triggerer ID instead of trigger ID to distinguish @@ -219,7 +219,7 @@ def close(self): self.handler.close() def _render_filename(self, ti: TaskInstance, try_number: int) -> str: - """Returns the worker log filename.""" + """Return the worker log filename.""" with create_session() as session: dag_run = ti.get_dagrun(session=session) template = dag_run.get_log_template(session=session).filename diff --git a/airflow/utils/log/log_reader.py b/airflow/utils/log/log_reader.py index d93f15bb1a98..9e751e47b1ab 100644 --- a/airflow/utils/log/log_reader.py +++ b/airflow/utils/log/log_reader.py @@ -41,7 +41,7 @@ def read_log_chunks( self, ti: TaskInstance, try_number: int | None, metadata ) -> tuple[list[tuple[tuple[str, str]]], dict[str, str]]: """ - Reads chunks of Task Instance logs. + Read chunks of Task Instance logs. :param ti: The taskInstance :param try_number: If provided, logs for the given try will be returned. @@ -65,7 +65,7 @@ def read_log_chunks( def read_log_stream(self, ti: TaskInstance, try_number: int | None, metadata: dict) -> Iterator[str]: """ - Used to continuously read log to the end. + Continuously read log to the end. :param ti: The Task Instance :param try_number: the task try number @@ -134,7 +134,7 @@ def render_log_filename( session: Session = NEW_SESSION, ) -> str: """ - Renders the log attachment filename. + Render the log attachment filename. :param ti: The task instance :param try_number: The task try number diff --git a/airflow/utils/log/logging_mixin.py b/airflow/utils/log/logging_mixin.py index 97e1a29ff6e2..9f7d5bc9373c 100644 --- a/airflow/utils/log/logging_mixin.py +++ b/airflow/utils/log/logging_mixin.py @@ -77,12 +77,12 @@ def _get_log(obj: Any, clazz: type[_T]) -> Logger: @classmethod def logger(cls) -> Logger: - """Returns a logger.""" + """Return a logger.""" return LoggingMixin._get_log(cls, cls) @property def log(self) -> Logger: - """Returns a logger.""" + """Return a logger.""" return LoggingMixin._get_log(self, self.__class__) def _set_context(self, context): @@ -171,7 +171,7 @@ def flush(self): def isatty(self): """ - Returns False to indicate the fd is not connected to a tty(-like) device. + Return False to indicate the fd is not connected to a tty(-like) device. For compatibility reasons. """ @@ -218,7 +218,7 @@ def stream(self): def set_context(logger, value): """ - Walks the tree of loggers and tries to set the context for each handler. + Walk the tree of loggers and try to set the context for each handler. :param logger: logger :param value: value to set diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index c70e469a3c37..efd612c04da2 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -86,7 +86,11 @@ def get_sensitive_variables_fields(): def should_hide_value_for_key(name): - """Should the value for this given name (Variable name, or key in conn.extra_dejson) be hidden.""" + """ + Return if the value for this given name should be hidden. + + Name might be a Variable name, or key in conn.extra_dejson, for example. + """ from airflow import settings if isinstance(name, str) and settings.HIDE_SENSITIVE_VAR_CONN_FIELDS: @@ -313,7 +317,7 @@ def _test_mode(self) -> bool: return conf.getboolean("core", "unit_test_mode") def _adaptations(self, secret: str) -> Generator[str, None, None]: - """Yields the secret along with any adaptations to the secret that should be masked.""" + """Yield the secret along with any adaptations to the secret that should be masked.""" yield secret if self._mask_adapter: diff --git a/airflow/utils/operator_helpers.py b/airflow/utils/operator_helpers.py index 20f272f4f36d..ef1f05e3048e 100644 --- a/airflow/utils/operator_helpers.py +++ b/airflow/utils/operator_helpers.py @@ -199,7 +199,7 @@ def determine_kwargs( def make_kwargs_callable(func: Callable[..., R]) -> Callable[..., R]: """ - Creates a new callable that only forwards necessary arguments from any provided input. + Create a new callable that only forwards necessary arguments from any provided input. Make a new callable that can accept any number of positional or keyword arguments but only forwards those required by the given callable func. diff --git a/airflow/utils/platform.py b/airflow/utils/platform.py index 72906683f63e..2ee11c548f04 100644 --- a/airflow/utils/platform.py +++ b/airflow/utils/platform.py @@ -53,7 +53,7 @@ def is_terminal_support_colors() -> bool: def get_airflow_git_version(): - """Returns the git commit hash representing the current version of the application.""" + """Return the git commit hash representing the current version of the application.""" git_version = None try: git_version = str(pkgutil.get_data("airflow", "git_version"), encoding="UTF-8") diff --git a/airflow/utils/process_utils.py b/airflow/utils/process_utils.py index 74ad50bf7404..f3104df918cd 100644 --- a/airflow/utils/process_utils.py +++ b/airflow/utils/process_utils.py @@ -299,7 +299,7 @@ def patch_environ(new_env_variables: dict[str, str]) -> Generator[None, None, No def check_if_pidfile_process_is_running(pid_file: str, process_name: str): """ - Checks if a pidfile already exists and process is still running. + Check if a pidfile already exists and process is still running. If process is dead then pidfile is removed. diff --git a/airflow/utils/providers_configuration_loader.py b/airflow/utils/providers_configuration_loader.py index 543a25c05e6c..4aeb93edcb39 100644 --- a/airflow/utils/providers_configuration_loader.py +++ b/airflow/utils/providers_configuration_loader.py @@ -27,8 +27,7 @@ def providers_configuration_loaded(func: Callable[PS, RT]) -> Callable[PS, RT]: """ - Decorator that makes sure that providers configuration is loaded before actually calling - the decorated function. + Make sure that providers configuration is loaded before actually calling the decorated function. ProvidersManager initialization of configuration is relatively inexpensive - it walks through all providers's entrypoints, retrieve the provider_info and loads config yaml parts of the get_info. diff --git a/airflow/utils/python_virtualenv.py b/airflow/utils/python_virtualenv.py index 1adabacafff5..a9f7b5eeb124 100644 --- a/airflow/utils/python_virtualenv.py +++ b/airflow/utils/python_virtualenv.py @@ -68,7 +68,8 @@ def prepare_virtualenv( requirements_file_path: str | None = None, pip_install_options: list[str] | None = None, ) -> str: - """Creates a virtual environment and installs the additional python packages. + """ + Create a virtual environment and install the additional python packages. :param venv_directory: The path for directory where the environment will be created. :param python_bin: Path to the Python executable. @@ -107,7 +108,7 @@ def write_python_script( render_template_as_native_obj: bool = False, ): """ - Renders the python script to a file to execute in the virtual environment. + Render the python script to a file to execute in the virtual environment. :param jinja_context: The jinja context variables to unpack and replace with its placeholders in the template file. diff --git a/airflow/utils/retries.py b/airflow/utils/retries.py index 6adce3eb4377..5b67feec439d 100644 --- a/airflow/utils/retries.py +++ b/airflow/utils/retries.py @@ -59,7 +59,8 @@ def retry_db_transaction(_func: F) -> F: def retry_db_transaction(_func: Callable | None = None, *, retries: int = MAX_DB_RETRIES, **retry_kwargs): - """Decorator to retry functions in case of ``OperationalError`` from DB. + """ + Retry functions in case of ``OperationalError`` from DB. It should not be used with ``@provide_session``. """ diff --git a/airflow/utils/serve_logs.py b/airflow/utils/serve_logs.py index bae8a97da93a..0e926ccd8920 100644 --- a/airflow/utils/serve_logs.py +++ b/airflow/utils/serve_logs.py @@ -162,7 +162,7 @@ def load(self): def serve_logs(port=None): - """Serves logs generated by Worker.""" + """Serve logs generated by Worker.""" setproctitle("airflow serve-logs") wsgi_app = create_app() diff --git a/airflow/utils/session.py b/airflow/utils/session.py index 5c7e9eef505c..400cac7375f9 100644 --- a/airflow/utils/session.py +++ b/airflow/utils/session.py @@ -60,7 +60,7 @@ def find_session_idx(func: Callable[PS, RT]) -> int: def provide_session(func: Callable[PS, RT]) -> Callable[PS, RT]: """ - Function decorator that provides a session if it isn't provided. + Provide a session if it isn't provided. If you want to reuse a session or run the function as part of a database transaction, you pass it to the function, if not this wrapper diff --git a/airflow/utils/setup_teardown.py b/airflow/utils/setup_teardown.py index cedd5b7a7a5c..f1de3b4092a0 100644 --- a/airflow/utils/setup_teardown.py +++ b/airflow/utils/setup_teardown.py @@ -181,8 +181,10 @@ def _push_tasks(cls, operator: AbstractOperator | list[AbstractOperator], setup: @classmethod def _update_teardown_downstream(cls, operator: AbstractOperator | list[AbstractOperator]): - """This recursively goes through the tasks downstream of the setup in the context manager, - if found, updates the _teardown_downstream_of_setup accordingly. + """ + Recursively go through the tasks downstream of the setup in the context manager. + + If found, update the _teardown_downstream_of_setup accordingly. """ operator = operator[0] if isinstance(operator, list) else operator @@ -209,8 +211,10 @@ def _get_teardowns(tasks): @classmethod def _update_setup_upstream(cls, operator: AbstractOperator | list[AbstractOperator]): - """This recursively goes through the tasks upstream of the teardown task in the context manager, - if found, updates the _setup_upstream_of_teardown accordingly. + """ + Recursively go through the tasks upstream of the teardown task in the context manager. + + If found, updates the _setup_upstream_of_teardown accordingly. """ operator = operator[0] if isinstance(operator, list) else operator @@ -253,7 +257,7 @@ def set_setup_task_as_roots(cls, roots): @classmethod def set_work_task_roots_and_leaves(cls): - """Sets the work task roots and leaves.""" + """Set the work task roots and leaves.""" if setup_task := cls._context_managed_setup_task: if isinstance(setup_task, list): setup_task = tuple(setup_task) @@ -293,8 +297,7 @@ def set_work_task_roots_and_leaves(cls): @classmethod def set_setup_teardown_relationships(cls): """ - Here we set relationship between setup to setup and - teardown to teardown. + Set relationship between setup to setup and teardown to teardown. code:: python with setuptask >> teardowntask: diff --git a/airflow/utils/sqlalchemy.py b/airflow/utils/sqlalchemy.py index 5690799ce468..38716d4eb5f5 100644 --- a/airflow/utils/sqlalchemy.py +++ b/airflow/utils/sqlalchemy.py @@ -86,7 +86,7 @@ def process_bind_param(self, value, dialect): def process_result_value(self, value, dialect): """ - Processes DateTimes from the DB making sure it is always returning UTC. + Process DateTimes from the DB making sure to always return UTC. Not using timezone.convert_to_utc as that converts to configured TIMEZONE while the DB might be running with some other setting. We assume UTC @@ -120,7 +120,7 @@ class ExtendedJSON(TypeDecorator): cache_ok = True def db_supports_json(self): - """Checks if the database supports JSON (i.e. is NOT MSSQL).""" + """Check if the database supports JSON (i.e. is NOT MSSQL).""" return not conf.get("database", "sql_alchemy_conn").startswith("mssql") def load_dialect_impl(self, dialect) -> TypeEngine: @@ -545,7 +545,8 @@ def tuple_in_condition( *, session: Session | None = None, ) -> ColumnOperators: - """Generates a tuple-in-collection operator to use in ``.where()``. + """ + Generate a tuple-in-collection operator to use in ``.where()``. For most SQL backends, this generates a simple ``([col, ...]) IN [condition]`` clause. This however does not work with MSSQL, where we need to expand to @@ -591,7 +592,8 @@ def tuple_not_in_condition( *, session: Session | None = None, ) -> ColumnOperators: - """Generates a tuple-not-in-collection operator to use in ``.where()``. + """ + Generate a tuple-not-in-collection operator to use in ``.where()``. This is similar to ``tuple_in_condition`` except generating ``NOT IN``. diff --git a/airflow/utils/state.py b/airflow/utils/state.py index a0c828ee0747..22fb6e27c814 100644 --- a/airflow/utils/state.py +++ b/airflow/utils/state.py @@ -134,7 +134,7 @@ class State: @classmethod def color(cls, state): - """Returns color for a state.""" + """Return color for a state.""" return cls.state_color.get(state, "white") @classmethod diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index deee607dd17a..fd2d0f727ba1 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -269,7 +269,7 @@ def update_relative( self, other: DependencyMixin, upstream: bool = True, edge_modifier: EdgeModifier | None = None ) -> None: """ - Overrides TaskMixin.update_relative. + Override TaskMixin.update_relative. Update upstream_group_ids/downstream_group_ids/upstream_task_ids/downstream_task_ids accordingly so that we can reduce the number of edges when displaying Graph view. @@ -341,7 +341,7 @@ def __exit__(self, _type, _value, _tb): TaskGroupContext.pop_context_managed_task_group() def has_task(self, task: BaseOperator) -> bool: - """Returns True if this TaskGroup or its children TaskGroups contains the given task.""" + """Return True if this TaskGroup or its children TaskGroups contains the given task.""" if task.task_id in self.children: return True @@ -422,7 +422,7 @@ def downstream_join_id(self) -> str: return f"{self.group_id}.downstream_join_id" def get_task_group_dict(self) -> dict[str, TaskGroup]: - """Returns a flat dictionary of group_id: TaskGroup.""" + """Return a flat dictionary of group_id: TaskGroup.""" task_group_map = {} def build_map(task_group): @@ -442,14 +442,14 @@ def get_child_by_label(self, label: str) -> DAGNode: return self.children[self.child_id(label)] def serialize_for_task_group(self) -> tuple[DagAttributeTypes, Any]: - """Required by DAGNode.""" + """Serialize task group; required by DAGNode.""" from airflow.serialization.serialized_objects import TaskGroupSerialization return DagAttributeTypes.TASK_GROUP, TaskGroupSerialization.serialize_task_group(self) def hierarchical_alphabetical_sort(self): """ - Sorts children in hierarchical alphabetical order. + Sort children in hierarchical alphabetical order. - groups in alphabetical order first - tasks in alphabetical order after them. @@ -533,7 +533,7 @@ def iter_mapped_task_groups(self) -> Iterator[MappedTaskGroup]: group = group.task_group def iter_tasks(self) -> Iterator[AbstractOperator]: - """Returns an iterator of the child tasks.""" + """Return an iterator of the child tasks.""" from airflow.models.abstractoperator import AbstractOperator groups_to_visit = [self] @@ -577,7 +577,8 @@ def iter_mapped_dependencies(self) -> Iterator[Operator]: @cache def get_parse_time_mapped_ti_count(self) -> int: - """Number of instances a task in this group should be mapped to, when a DAG run is created. + """ + Return the Number of instances a task in this group should be mapped to, when a DAG run is created. This only considers literal mapped arguments, and would return *None* when any non-literal values are used for mapping. @@ -596,7 +597,8 @@ def get_parse_time_mapped_ti_count(self) -> int: ) def get_mapped_ti_count(self, run_id: str, *, session: Session) -> int: - """Number of instances a task in this group should be mapped to at run time. + """ + Return the number of instances a task in this group should be mapped to at run time. This considers both literal and non-literal mapped arguments, and the result is therefore available when all depended tasks have finished. The diff --git a/airflow/utils/timeout.py b/airflow/utils/timeout.py index 06c4ba1de769..59330e7c0784 100644 --- a/airflow/utils/timeout.py +++ b/airflow/utils/timeout.py @@ -39,7 +39,7 @@ def __init__(self, seconds=1, error_message="Timeout"): self.error_message = error_message + ", PID: " + str(os.getpid()) def handle_timeout(self, *args): - """Logs information and raises AirflowTaskTimeout.""" + """Log information and raises AirflowTaskTimeout.""" self.log.error("Process timed out, PID: %s", str(os.getpid())) raise AirflowTaskTimeout(self.error_message) @@ -64,7 +64,7 @@ def __init__(self, seconds=1, error_message="Timeout"): self.error_message = error_message + ", PID: " + str(os.getpid()) def handle_timeout(self, signum, frame): - """Logs information and raises AirflowTaskTimeout.""" + """Log information and raises AirflowTaskTimeout.""" self.log.error("Process timed out, PID: %s", str(os.getpid())) raise AirflowTaskTimeout(self.error_message) diff --git a/airflow/utils/timezone.py b/airflow/utils/timezone.py index 1db15ac61219..f7116cd81f98 100644 --- a/airflow/utils/timezone.py +++ b/airflow/utils/timezone.py @@ -64,7 +64,7 @@ def utcnow() -> dt.datetime: def utc_epoch() -> dt.datetime: - """Gets the epoch in the users timezone.""" + """Get the epoch in the user's timezone.""" # pendulum utcnow() is not used as that sets a TimezoneInfo object # instead of a Timezone. This is not picklable and also creates issues # when using replace() @@ -85,7 +85,7 @@ def convert_to_utc(value: dt.datetime) -> DateTime: def convert_to_utc(value: dt.datetime | None) -> DateTime | None: - """Creates a datetime with the default timezone added if none is associated. + """Create a datetime with the default timezone added if none is associated. :param value: datetime :return: datetime with tzinfo @@ -182,7 +182,7 @@ def make_naive(value, timezone=None): def datetime(*args, **kwargs): """ - Wrapper around datetime.datetime that adds settings.TIMEZONE if tzinfo not specified. + Wrap around datetime.datetime to add settings.TIMEZONE if tzinfo not specified. :return: datetime.datetime """ diff --git a/airflow/utils/trigger_rule.py b/airflow/utils/trigger_rule.py index dabc9f94bd16..6ec37cba8187 100644 --- a/airflow/utils/trigger_rule.py +++ b/airflow/utils/trigger_rule.py @@ -40,12 +40,12 @@ class TriggerRule(str, Enum): @classmethod def is_valid(cls, trigger_rule: str) -> bool: - """Validates a trigger rule.""" + """Validate a trigger rule.""" return trigger_rule in cls.all_triggers() @classmethod def all_triggers(cls) -> set[str]: - """Returns all trigger rules.""" + """Return all trigger rules.""" return set(cls.__members__.values()) def __str__(self) -> str: diff --git a/airflow/utils/types.py b/airflow/utils/types.py index 0eab9b3b8785..32a0bda6bbb9 100644 --- a/airflow/utils/types.py +++ b/airflow/utils/types.py @@ -60,7 +60,7 @@ def generate_run_id(self, logical_date: datetime) -> str: @staticmethod def from_run_id(run_id: str) -> DagRunType: - """Resolved DagRun type from run_id.""" + """Resolve DagRun type from run_id.""" for run_type in DagRunType: if run_id and run_id.startswith(f"{run_type.value}__"): return run_type diff --git a/airflow/utils/weekday.py b/airflow/utils/weekday.py index cb404628c133..1e6f698f69c9 100644 --- a/airflow/utils/weekday.py +++ b/airflow/utils/weekday.py @@ -50,7 +50,7 @@ def get_weekday_number(cls, week_day_str: str): @classmethod def convert(cls, day: str | WeekDay) -> int: - """Helper function that returns the day number in the week.""" + """Return the day number in the week.""" if isinstance(day, WeekDay): return day return cls.get_weekday_number(week_day_str=day) diff --git a/airflow/utils/weight_rule.py b/airflow/utils/weight_rule.py index 002a81f4d589..f65f2fa77e1a 100644 --- a/airflow/utils/weight_rule.py +++ b/airflow/utils/weight_rule.py @@ -37,7 +37,7 @@ def is_valid(cls, weight_rule: str) -> bool: @classmethod @cache def all_weight_rules(cls) -> set[str]: - """Returns all weight rules.""" + """Return all weight rules.""" return set(cls.__members__.values()) def __str__(self) -> str: From 14c91db04a77b6feb0212a5a97de18531070f2d1 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sun, 13 Aug 2023 16:56:43 -0700 Subject: [PATCH 043/117] D401 Support - Root Files (#33352) (cherry picked from commit 5e1e5fa637aa90b92e4b0e1d62c577461ea92369) --- airflow/__main__.py | 1 - airflow/configuration.py | 45 ++++++++++++++++++----------------- airflow/policies.py | 8 +++---- airflow/sentry.py | 4 ++-- airflow/settings.py | 4 ++-- helm_tests/other/test_keda.py | 2 +- setup.py | 16 ++++++------- 7 files changed, 40 insertions(+), 40 deletions(-) diff --git a/airflow/__main__.py b/airflow/__main__.py index 20af0ac2744a..e49f7e9bdf6a 100644 --- a/airflow/__main__.py +++ b/airflow/__main__.py @@ -38,7 +38,6 @@ def main(): - """Main executable function.""" conf = configuration.conf if conf.get("core", "security") == "kerberos": os.environ["KRB5CCNAME"] = conf.get("kerberos", "ccache") diff --git a/airflow/configuration.py b/airflow/configuration.py index 2da495ba0e9e..22cb7fa04432 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -88,7 +88,7 @@ def expand_env_var(env_var: str) -> str: def expand_env_var(env_var: str | None) -> str | None: """ - Expands (potentially nested) env vars. + Expand (potentially nested) env vars. Repeat and apply `expandvars` and `expanduser` until interpolation stops having any effect. @@ -104,7 +104,7 @@ def expand_env_var(env_var: str | None) -> str | None: def run_command(command: str) -> str: - """Runs command and returns stdout.""" + """Run command and returns stdout.""" process = subprocess.Popen( shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True ) @@ -239,7 +239,7 @@ def is_template(self, section: str, key) -> bool: def _update_defaults_from_string(self, config_string: str): """ - The defaults in _default_values are updated based on values in config_string ("ini" format). + Update the defaults in _default_values based on values in config_string ("ini" format). Note that those values are not validated and cannot contain variables because we are using regular config parser to load them. This method is used to test the config parser in unit tests. @@ -268,7 +268,7 @@ def _update_defaults_from_string(self, config_string: str): def get_default_value(self, section: str, key: str, fallback: Any = None, raw=False, **kwargs) -> Any: """ - Retrieves default value from default config parser. + Retrieve default value from default config parser. This will retrieve the default value from the default config parser. Optionally a raw, stored value can be retrieved by setting skip_interpolation to True. This is useful for example when @@ -470,7 +470,7 @@ def inversed_deprecated_sections(self): def get_sections_including_defaults(self) -> list[str]: """ - Retrieves all sections from the configuration parser, including sections defined by built-in defaults. + Retrieve all sections from the configuration parser, including sections defined by built-in defaults. :return: list of section names """ @@ -478,7 +478,7 @@ def get_sections_including_defaults(self) -> list[str]: def get_options_including_defaults(self, section: str) -> list[str]: """ - Retrieves all possible option from the configuration parser for the section given. + Retrieve all possible option from the configuration parser for the section given. Includes options defined by built-in defaults. @@ -490,7 +490,7 @@ def get_options_including_defaults(self, section: str) -> list[str]: def optionxform(self, optionstr: str) -> str: """ - This method transforms option names on every read, get, or set operation. + Transform option names on every read, get, or set operation. This changes from the default behaviour of ConfigParser from lower-casing to instead be case-preserving. @@ -532,7 +532,7 @@ def _write_section_header( section_config_description: dict[str, str], section_to_write: str, ) -> None: - """Writes header for configuration section.""" + """Write header for configuration section.""" file.write(f"[{section_to_write}]\n") section_description = section_config_description.get("description") if section_description and include_descriptions: @@ -553,7 +553,8 @@ def _write_option_header( section_to_write: str, sources_dict: ConfigSourcesType, ) -> tuple[bool, bool]: - """Writes header for configuration option. + """ + Write header for configuration option. Returns tuple of (should_continue, needs_separation) where needs_separation should be set if the option needs additional separation to visually separate it from the next option. @@ -641,7 +642,7 @@ def write( # type: ignore[override] **kwargs: Any, ) -> None: """ - Writes configuration with comments and examples to a file. + Write configuration with comments and examples to a file. :param file: file to write to :param section: section of the config to write, defaults to all sections @@ -1191,7 +1192,7 @@ def getfloat(self, section: str, key: str, **kwargs) -> float: # type: ignore[o def getimport(self, section: str, key: str, **kwargs) -> Any: """ - Reads options, imports the full qualified name, and returns the object. + Read options, import the full qualified name, and return the object. In case of failure, it throws an exception with the key and section names @@ -1235,7 +1236,7 @@ def gettimedelta( self, section: str, key: str, fallback: Any = None, **kwargs ) -> datetime.timedelta | None: """ - Gets the config value for the given section and key, and converts it into datetime.timedelta object. + Get the config value for the given section and key, and convert it into datetime.timedelta object. If the key is missing, then it is considered as `None`. @@ -1321,7 +1322,7 @@ def remove_option(self, section: str, option: str, remove_default: bool = True): def getsection(self, section: str) -> ConfigOptionsDictType | None: """ - Returns the section as a dict. + Return the section as a dict. Values are converted to int, float, bool as required. @@ -1374,7 +1375,7 @@ def as_dict( include_secret: bool = True, ) -> ConfigSourcesType: """ - Returns the current configuration as an OrderedDict of OrderedDicts. + Return the current configuration as an OrderedDict of OrderedDicts. When materializing current configuration Airflow defaults are materialized along with user set configs. If any of the `include_*` @@ -1556,7 +1557,7 @@ def _filter_by_source( getter_func, ): """ - Deletes default configs from current configuration. + Delete default configs from current configuration. An OrderedDict of OrderedDicts, if it would conflict with special sensitive_config_values. @@ -1761,7 +1762,7 @@ def _replace_section_config_with_display_sources( def load_test_config(self): """ - Uses test configuration rather than the configuration coming from airflow defaults. + Use test configuration rather than the configuration coming from airflow defaults. When running tests we use special the unit_test configuration to avoid accidental modifications and different behaviours when running the tests. Values for those test configuration are stored in @@ -1785,7 +1786,7 @@ def load_test_config(self): log.info("Unit test configuration loaded from 'config_unit_tests.cfg'") def expand_all_configuration_values(self): - """Expands all configuration values using global and local variables defined in this module.""" + """Expand all configuration values using global and local variables defined in this module.""" all_vars = get_all_expansion_variables() for section in self.sections(): for key, value in self.items(section): @@ -1798,7 +1799,7 @@ def expand_all_configuration_values(self): self.set(section, key, value.format(**all_vars)) def remove_all_read_configurations(self): - """Removes all read configurations, leaving only default values in the config.""" + """Remove all read configurations, leaving only default values in the config.""" for section in self.sections(): self.remove_section(section) @@ -1809,7 +1810,7 @@ def providers_configuration_loaded(self) -> bool: def load_providers_configuration(self): """ - Loads configuration for providers. + Load configuration for providers. This should be done after initial configuration have been performed. Initializing and discovering providers is an expensive operation and cannot be performed when we load configuration for the first @@ -1920,7 +1921,7 @@ def _generate_fernet_key() -> str: def create_default_config_parser(configuration_description: dict[str, dict[str, Any]]) -> ConfigParser: """ - Creates default config parser based on configuration description. + Create default config parser based on configuration description. It creates ConfigParser with all default values retrieved from the configuration description and expands all the variables from the global and local variables defined in this module. @@ -1947,7 +1948,7 @@ def create_default_config_parser(configuration_description: dict[str, dict[str, def create_pre_2_7_defaults() -> ConfigParser: """ - Creates parser using the old defaults from Airflow < 2.7.0. + Create parser using the old defaults from Airflow < 2.7.0. This is used in order to be able to fall-back to those defaults when old version of provider, not supporting "config contribution" is installed with Airflow 2.7.0+. This "default" @@ -1985,7 +1986,7 @@ def write_default_airflow_configuration_if_needed() -> AirflowConfigParser: def load_standard_airflow_configuration(airflow_config_parser: AirflowConfigParser): """ - Loads standard airflow configuration. + Load standard airflow configuration. In case it finds that the configuration file is missing, it will create it and write the default configuration values there, based on defaults passed, and will add the comments and examples diff --git a/airflow/policies.py b/airflow/policies.py index 47c3dffcb22d..f37703fe2a25 100644 --- a/airflow/policies.py +++ b/airflow/policies.py @@ -35,7 +35,7 @@ @local_settings_hookspec def task_policy(task: BaseOperator) -> None: """ - This policy setting allows altering tasks after they are loaded in the DagBag. + Allow altering tasks after they are loaded in the DagBag. It allows administrator to rewire some task's parameters. Alternatively you can raise ``AirflowClusterPolicyViolation`` exception to stop DAG from being executed. @@ -53,7 +53,7 @@ def task_policy(task: BaseOperator) -> None: @local_settings_hookspec def dag_policy(dag: DAG) -> None: """ - This policy setting allows altering DAGs after they are loaded in the DagBag. + Allow altering DAGs after they are loaded in the DagBag. It allows administrator to rewire some DAG's parameters. Alternatively you can raise ``AirflowClusterPolicyViolation`` exception @@ -71,7 +71,7 @@ def dag_policy(dag: DAG) -> None: @local_settings_hookspec def task_instance_mutation_hook(task_instance: TaskInstance) -> None: """ - This setting allows altering task instances before being queued by the Airflow scheduler. + Allow altering task instances before being queued by the Airflow scheduler. This could be used, for instance, to modify the task instance during retries. @@ -108,7 +108,7 @@ def get_airflow_context_vars(context) -> dict[str, str]: # type: ignore[empty-b @local_settings_hookspec(firstresult=True) def get_dagbag_import_timeout(dag_file_path: str) -> int | float: # type: ignore[empty-body] """ - This setting allows for dynamic control of the DAG file parsing timeout based on the DAG file path. + Allow for dynamic control of the DAG file parsing timeout based on the DAG file path. It is useful when there are a few DAG files requiring longer parsing times, while others do not. You can control them separately instead of having one value for all DAG files. diff --git a/airflow/sentry.py b/airflow/sentry.py index 443063af8a62..34133228ca13 100644 --- a/airflow/sentry.py +++ b/airflow/sentry.py @@ -122,7 +122,7 @@ def __init__(self): sentry_sdk.init(integrations=integrations, **sentry_config_opts) def add_tagging(self, task_instance): - """Function to add tagging for a task_instance.""" + """Add tagging for a task_instance.""" dag_run = task_instance.dag_run task = task_instance.task @@ -141,7 +141,7 @@ def add_breadcrumbs( task_instance: TaskInstance, session: Session | None = None, ) -> None: - """Function to add breadcrumbs inside of a task_instance.""" + """Add breadcrumbs inside of a task_instance.""" if session is None: return dr = task_instance.get_dagrun(session) diff --git a/airflow/settings.py b/airflow/settings.py index 8e18cfcec72b..4ba9e80bd760 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -118,7 +118,7 @@ def _get_rich_console(file): def custom_show_warning(message, category, filename, lineno, file=None, line=None): - """Custom function to print rich and visible warnings.""" + """Print rich and visible warnings.""" # Delay imports until we need it from rich.markup import escape @@ -432,7 +432,7 @@ def prepare_syspath(): def get_session_lifetime_config(): - """Gets session timeout configs and handles outdated configs gracefully.""" + """Get session timeout configs and handle outdated configs gracefully.""" session_lifetime_minutes = conf.get("webserver", "session_lifetime_minutes", fallback=None) session_lifetime_days = conf.get("webserver", "session_lifetime_days", fallback=None) uses_deprecated_lifetime_configs = session_lifetime_days or conf.get( diff --git a/helm_tests/other/test_keda.py b/helm_tests/other/test_keda.py index 5fd6c14f8e57..cd5dd5b6cf8b 100644 --- a/helm_tests/other/test_keda.py +++ b/helm_tests/other/test_keda.py @@ -85,7 +85,7 @@ def test_keda_advanced(self, executor): @staticmethod def build_query(executor, concurrency=16, queue=None): - """Builds the query used by KEDA autoscaler to determine how many workers there should be.""" + """Build the query used by KEDA autoscaler to determine how many workers there should be.""" query = ( f"SELECT ceil(COUNT(*)::decimal / {concurrency}) " "FROM task_instance WHERE (state='running' OR state='queued')" diff --git a/setup.py b/setup.py index ad1ca9d44175..9a2daded8e79 100644 --- a/setup.py +++ b/setup.py @@ -704,7 +704,7 @@ def get_all_db_dependencies() -> list[str]: def is_package_excluded(package: str, exclusion_list: list[str]) -> bool: """ - Checks if package should be excluded. + Check if package should be excluded. :param package: package name (beginning of it) :param exclusion_list: list of excluded packages @@ -715,7 +715,7 @@ def is_package_excluded(package: str, exclusion_list: list[str]) -> bool: def remove_provider_limits(package: str) -> str: """ - Removes the limit for providers in devel_all to account for pre-release and development packages. + Remove the limit for providers in devel_all to account for pre-release and development packages. :param package: package name (beginning of it) :return: true if package should be excluded @@ -749,7 +749,7 @@ def remove_provider_limits(package: str) -> str: def sort_extras_dependencies() -> dict[str, list[str]]: """ - The dictionary order remains when keys() are retrieved. + Sort dependencies; the dictionary order remains when keys() are retrieved. Sort both: extras and list of dependencies to make it easier to analyse problems external packages will be first, then if providers are added they are added at the end of the lists. @@ -777,7 +777,7 @@ def sort_extras_dependencies() -> dict[str, list[str]]: def get_provider_package_name_from_package_id(package_id: str) -> str: """ - Builds the name of provider package out of the package id provided/. + Build the name of provider package out of the package id provided. :param package_id: id of the package (like amazon or microsoft.azure) :return: full name of package in PyPI @@ -796,12 +796,12 @@ def get_provider_package_name_from_package_id(package_id: str) -> str: def get_excluded_providers() -> list[str]: - """Returns packages excluded for the current python version.""" + """Return packages excluded for the current python version.""" return [] def get_all_provider_packages() -> str: - """Returns all provider packages configured in setup.py.""" + """Return all provider packages configured in setup.py.""" excluded_providers = get_excluded_providers() return " ".join( get_provider_package_name_from_package_id(package) @@ -844,7 +844,7 @@ def parse_config_files(self, *args, **kwargs) -> None: def replace_extra_dependencies_with_provider_packages(extra: str, providers: list[str]) -> None: """ - Replaces extra dependencies with provider package. + Replace extra dependencies with provider package. The intention here is that when the provider is added as dependency of extra, there is no need to add the dependencies separately. This is not needed and even harmful, because in @@ -897,7 +897,7 @@ def replace_extra_dependencies_with_provider_packages(extra: str, providers: lis def add_provider_packages_to_extra_dependencies(extra: str, providers: list[str]) -> None: """ - Adds provider packages as dependencies to extra. + Add provider packages as dependencies to extra. This is used to add provider packages as dependencies to the "bulk" kind of extras. Those bulk extras do not have the detailed 'extra' dependencies as initial values, From f44a3246d127a1ad37b1c80035afe00db7785c8c Mon Sep 17 00:00:00 2001 From: Pierre Jeambrun Date: Mon, 14 Aug 2023 22:45:47 +0200 Subject: [PATCH 044/117] Fix DagFileProcessor interfering with dags outside its ``processor_subdir`` (#33357) * Fix standalone DagProcessor interfering with DAG outsite of its subdir * Add tests * Update code review (cherry picked from commit 35b18306a4928152fd1834964fc8ce0033811817) --- airflow/dag_processing/manager.py | 12 ++++++--- airflow/models/dag.py | 13 ++++++++- airflow/models/dagcode.py | 14 ++++++++-- airflow/models/serialized_dag.py | 1 + tests/dag_processing/test_job_runner.py | 35 ++++++++++++++++++++++++- tests/models/test_dag.py | 7 +++-- 6 files changed, 72 insertions(+), 10 deletions(-) diff --git a/airflow/dag_processing/manager.py b/airflow/dag_processing/manager.py index 534763843791..5a92af893bd8 100644 --- a/airflow/dag_processing/manager.py +++ b/airflow/dag_processing/manager.py @@ -218,7 +218,6 @@ def _run_processor_manager( pickle_dags: bool, async_mode: bool, ) -> None: - # Make this process start as a new process group - that makes it easy # to kill all sub-process of this at the OS-level, rather than having # to iterate the child processes @@ -793,8 +792,14 @@ def _iter_dag_filelocs(fileloc: str) -> Iterator[str]: alive_dag_filelocs=dag_filelocs, processor_subdir=self.get_dag_directory(), ) - DagModel.deactivate_deleted_dags(dag_filelocs) - DagCode.remove_deleted_code(dag_filelocs) + DagModel.deactivate_deleted_dags( + dag_filelocs, + processor_subdir=self.get_dag_directory(), + ) + DagCode.remove_deleted_code( + dag_filelocs, + processor_subdir=self.get_dag_directory(), + ) return True return False @@ -1133,7 +1138,6 @@ def prepare_file_path_queue(self): file_paths_recently_processed = [] file_paths_to_stop_watching = set() for file_path in self._file_paths: - if is_mtime_mode: try: files_with_mtime[file_path] = os.path.getmtime(file_path) diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 36555ce10fc8..beb7fe013152 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -3565,16 +3565,27 @@ def set_is_paused(self, is_paused: bool, including_subdags: bool = True, session def deactivate_deleted_dags( cls, alive_dag_filelocs: Container[str], + processor_subdir: str, session: Session = NEW_SESSION, ) -> None: """ Set ``is_active=False`` on the DAGs for which the DAG files have been removed. :param alive_dag_filelocs: file paths of alive DAGs + :param processor_subdir: dag processor subdir :param session: ORM Session """ log.debug("Deactivating DAGs (for which DAG files are deleted) from %s table ", cls.__tablename__) - dag_models = session.scalars(select(cls).where(cls.fileloc.is_not(None))) + dag_models = session.scalars( + select(cls).where( + cls.fileloc.is_not(None), + or_( + cls.processor_subdir.is_(None), + cls.processor_subdir == processor_subdir, + ), + ) + ) + for dag_model in dag_models: if dag_model.fileloc not in alive_dag_filelocs: dag_model.is_active = False diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index 206c97e08f29..df04ab177385 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -125,10 +125,16 @@ def bulk_sync_to_db(cls, filelocs: Iterable[str], session: Session = NEW_SESSION @classmethod @provide_session - def remove_deleted_code(cls, alive_dag_filelocs: Collection[str], session: Session = NEW_SESSION) -> None: + def remove_deleted_code( + cls, + alive_dag_filelocs: Collection[str], + processor_subdir: str, + session: Session = NEW_SESSION, + ) -> None: """Delete code not included in alive_dag_filelocs. :param alive_dag_filelocs: file paths of alive DAGs + :param processor_subdir: dag processor subdir :param session: ORM Session """ alive_fileloc_hashes = [cls.dag_fileloc_hash(fileloc) for fileloc in alive_dag_filelocs] @@ -137,7 +143,11 @@ def remove_deleted_code(cls, alive_dag_filelocs: Collection[str], session: Sessi session.execute( delete(cls) - .where(cls.fileloc_hash.notin_(alive_fileloc_hashes), cls.fileloc.notin_(alive_dag_filelocs)) + .where( + cls.fileloc_hash.notin_(alive_fileloc_hashes), + cls.fileloc.notin_(alive_dag_filelocs), + cls.fileloc.contains(processor_subdir), + ) .execution_options(synchronize_session="fetch") ) diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 62a56cf7734e..23e8c59302c9 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -242,6 +242,7 @@ def remove_deleted_dags( """Delete DAGs not included in alive_dag_filelocs. :param alive_dag_filelocs: file paths of alive DAGs + :param processor_subdir: dag processor subdir :param session: ORM Session """ alive_fileloc_hashes = [DagCode.dag_fileloc_hash(fileloc) for fileloc in alive_dag_filelocs] diff --git a/tests/dag_processing/test_job_runner.py b/tests/dag_processing/test_job_runner.py index 9a83b9feb209..e300eb986615 100644 --- a/tests/dag_processing/test_job_runner.py +++ b/tests/dag_processing/test_job_runner.py @@ -188,7 +188,6 @@ def test_remove_file_clears_import_error(self, tmpdir): @conf_vars({("core", "load_examples"): "False"}) def test_max_runs_when_no_files(self): - child_pipe, parent_pipe = multiprocessing.Pipe() with TemporaryDirectory(prefix="empty-airflow-dags-") as dags_folder: @@ -1001,6 +1000,40 @@ def test_refresh_dags_dir_deactivates_deleted_zipped_dags(self, tmpdir): # assert dag deactivated assert not dag.get_is_active() + def test_refresh_dags_dir_does_not_interfer_with_dags_outside_its_subdir(self, tmpdir): + """Test DagProcessorJobRunner._refresh_dag_dir should not update dags outside its processor_subdir""" + + dagbag = DagBag(dag_folder=tmpdir, include_examples=False) + dag_path = os.path.join(TEST_DAGS_FOLDER, "test_miscellaneous.py") + dagbag.process_file(dag_path) + dag = dagbag.get_dag("miscellaneous_test_dag") + dag.sync_to_db(processor_subdir=str(TEST_DAG_FOLDER)) + SerializedDagModel.write_dag(dag, processor_subdir=str(TEST_DAG_FOLDER)) + + assert SerializedDagModel.has_dag("miscellaneous_test_dag") + assert dag.get_is_active() + assert DagCode.has_dag(dag.fileloc) + + manager = DagProcessorJobRunner( + job=Job(), + processor=DagFileProcessorManager( + dag_directory=TEST_DAG_FOLDER / "subdir2" / "subdir3", + max_runs=1, + processor_timeout=timedelta(days=365), + signal_conn=MagicMock(), + dag_ids=[], + pickle_dags=False, + async_mode=True, + ), + ) + manager.processor.last_dag_dir_refresh_time = timezone.utcnow() - timedelta(minutes=10) + + manager.processor._refresh_dag_dir() + + assert SerializedDagModel.has_dag("miscellaneous_test_dag") + assert dag.get_is_active() + assert DagCode.has_dag(dag.fileloc) + @conf_vars( { ("core", "load_examples"): "False", diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index b6827fa541c9..88b8f238dff0 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -1303,14 +1303,17 @@ def test_dag_is_deactivated_upon_dagfile_deletion(self): dag.fileloc = dag_fileloc session = settings.Session() with mock.patch("airflow.models.dag.DagCode.bulk_sync_to_db"): - dag.sync_to_db(session=session) + dag.sync_to_db(session=session, processor_subdir="/usr/local/airflow/dags/") orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one() assert orm_dag.is_active assert orm_dag.fileloc == dag_fileloc - DagModel.deactivate_deleted_dags(list_py_file_paths(settings.DAGS_FOLDER)) + DagModel.deactivate_deleted_dags( + list_py_file_paths(settings.DAGS_FOLDER), + processor_subdir="/usr/local/airflow/dags/", + ) orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one() assert not orm_dag.is_active From c3580fc4dd732f60c59cb6df79101ab9e64958ba Mon Sep 17 00:00:00 2001 From: Andreas Albert <103571926+AndreasAlbertQC@users.noreply.github.com> Date: Mon, 14 Aug 2023 22:45:51 +0200 Subject: [PATCH 045/117] Fix config description for base_log_folder (#33388) (cherry picked from commit 08565dcb8b296609a22955e79291ae9dfb51b5bb) --- airflow/config_templates/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index b9ea0c0e3f6f..ec386f7158d3 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -632,7 +632,7 @@ logging: This path must be absolute. There are a few existing configurations that assume this is set to the default. If you choose to override this you may need to update the dag_processor_manager_log_location and - dag_processor_manager_log_location settings as well. + child_process_log_directory settings as well. version_added: 2.0.0 type: string example: ~ From 08d4e0168cecae7b93e79deb91cb2bcbd434cf20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Sun, 20 Aug 2023 18:02:34 +0000 Subject: [PATCH 046/117] Simplify 'X for X in Y' to 'Y' where applicable (#33453) (cherry picked from commit 7700fb12cc6c7a97901662e6ac6aa1e4e932d969) --- airflow/lineage/__init__.py | 2 +- airflow/models/dagrun.py | 2 +- airflow/providers/apache/hive/hooks/hive.py | 2 +- airflow/providers/microsoft/azure/operators/batch.py | 2 +- airflow/providers/smtp/hooks/smtp.py | 2 +- airflow/utils/email.py | 2 +- airflow/www/views.py | 2 +- dev/airflow-license | 2 +- docker_tests/test_prod_image.py | 1 - scripts/ci/pre_commit/common_precommit_utils.py | 3 +-- tests/always/test_connection.py | 6 +++--- tests/conftest.py | 2 +- tests/models/test_mappedoperator.py | 2 +- tests/models/test_skipmixin.py | 2 +- tests/providers/amazon/aws/sensors/test_eks.py | 10 +++------- .../google/cloud/log/test_stackdriver_task_handler.py | 2 +- .../google/cloud/transfers/test_sql_to_gcs.py | 4 +--- .../triggers/test_cloud_storage_transfer_service.py | 2 +- tests/sensors/test_external_task_sensor.py | 8 ++++---- tests/system/providers/amazon/aws/example_s3_to_sql.py | 2 +- 20 files changed, 26 insertions(+), 34 deletions(-) diff --git a/airflow/lineage/__init__.py b/airflow/lineage/__init__.py index e22f264fdb4e..a2fcdf4ed5cd 100644 --- a/airflow/lineage/__init__.py +++ b/airflow/lineage/__init__.py @@ -142,7 +142,7 @@ def wrapper(self, context, *args, **kwargs): _inlets = self.xcom_pull( context, task_ids=task_ids, dag_id=self.dag_id, key=PIPELINE_OUTLETS, session=session ) - self.inlets.extend(i for i in itertools.chain.from_iterable(_inlets)) + self.inlets.extend(itertools.chain.from_iterable(_inlets)) elif self.inlets: raise AttributeError("inlets is not a list, operator, string or attr annotated object") diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 923d6f3d8af1..99e517656a12 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -1238,7 +1238,7 @@ def _revise_map_indexes_if_mapped(self, task: Operator, *, session: Session) -> TI.run_id == self.run_id, ) ) - existing_indexes = {i for i in query} + existing_indexes = set(query) removed_indexes = existing_indexes.difference(range(total_length)) if removed_indexes: diff --git a/airflow/providers/apache/hive/hooks/hive.py b/airflow/providers/apache/hive/hooks/hive.py index 8e81592f7911..debe80e29fd0 100644 --- a/airflow/providers/apache/hive/hooks/hive.py +++ b/airflow/providers/apache/hive/hooks/hive.py @@ -235,7 +235,7 @@ def run_cli( invalid_chars_list = re.findall(r"[^a-z0-9_]", schema) if invalid_chars_list: - invalid_chars = "".join(char for char in invalid_chars_list) + invalid_chars = "".join(invalid_chars_list) raise RuntimeError(f"The schema `{schema}` contains invalid characters: {invalid_chars}") if schema: diff --git a/airflow/providers/microsoft/azure/operators/batch.py b/airflow/providers/microsoft/azure/operators/batch.py index bb93c3b5ad8e..63b925a98199 100644 --- a/airflow/providers/microsoft/azure/operators/batch.py +++ b/airflow/providers/microsoft/azure/operators/batch.py @@ -189,7 +189,7 @@ def _check_inputs(self) -> Any: ) if self.use_latest_image: - if not all(elem for elem in [self.vm_publisher, self.vm_offer]): + if not self.vm_publisher or not self.vm_offer: raise AirflowException( f"If use_latest_image_and_sku is set to True then the parameters vm_publisher, " f"vm_offer, must all be set. " diff --git a/airflow/providers/smtp/hooks/smtp.py b/airflow/providers/smtp/hooks/smtp.py index c196db1680f0..74df15d67667 100644 --- a/airflow/providers/smtp/hooks/smtp.py +++ b/airflow/providers/smtp/hooks/smtp.py @@ -329,7 +329,7 @@ def _get_email_list_from_str(self, addresses: str) -> list[str]: :return: A list of email addresses. """ pattern = r"\s*[,;]\s*" - return [address for address in re.split(pattern, addresses)] + return re.split(pattern, addresses) @property def conn(self) -> Connection: diff --git a/airflow/utils/email.py b/airflow/utils/email.py index 8e139e5b52cd..2957e5e1d3f6 100644 --- a/airflow/utils/email.py +++ b/airflow/utils/email.py @@ -340,4 +340,4 @@ def _get_email_list_from_str(addresses: str) -> list[str]: :return: A list of email addresses. """ pattern = r"\s*[,;]\s*" - return [address for address in re2.split(pattern, addresses)] + return re2.split(pattern, addresses) diff --git a/airflow/www/views.py b/airflow/www/views.py index 8838785f4550..683fabc77f45 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -2707,7 +2707,7 @@ def confirm(self): return redirect_or_json( origin, msg=f"TaskGroup {group_id} could not be found", status="error", status_code=404 ) - tasks = [task for task in task_group.iter_tasks()] + tasks = list(task_group.iter_tasks()) elif task_id: try: task = dag.get_task(task_id) diff --git a/dev/airflow-license b/dev/airflow-license index 0f9cc7cfbb9d..2144f54b8771 100755 --- a/dev/airflow-license +++ b/dev/airflow-license @@ -79,5 +79,5 @@ if __name__ == "__main__": license = parse_license_file(notice[1]) print(f"{notice[1]:<30}|{notice[2][:50]:<50}||{notice[0]:<20}||{license:<10}") - file_count = len([name for name in os.listdir("../licenses")]) + file_count = len(os.listdir("../licenses")) print(f"Defined licenses: {len(notices)} Files found: {file_count}") diff --git a/docker_tests/test_prod_image.py b/docker_tests/test_prod_image.py index 46cba5fbfaaa..01e271a4071a 100644 --- a/docker_tests/test_prod_image.py +++ b/docker_tests/test_prod_image.py @@ -85,7 +85,6 @@ def test_required_providers_are_installed(self): lines = PREINSTALLED_PROVIDERS else: lines = (d.strip() for d in INSTALLED_PROVIDER_PATH.read_text().splitlines()) - lines = (d for d in lines) packages_to_install = {f"apache-airflow-providers-{d.replace('.', '-')}" for d in lines} assert len(packages_to_install) != 0 diff --git a/scripts/ci/pre_commit/common_precommit_utils.py b/scripts/ci/pre_commit/common_precommit_utils.py index 29109a4c3433..3bb4e1c4184c 100644 --- a/scripts/ci/pre_commit/common_precommit_utils.py +++ b/scripts/ci/pre_commit/common_precommit_utils.py @@ -64,8 +64,7 @@ def insert_documentation(file_path: Path, content: list[str], header: str, foote def get_directory_hash(directory: Path, skip_path_regexp: str | None = None) -> str: - files = [file for file in directory.rglob("*")] - files.sort() + files = sorted(directory.rglob("*")) if skip_path_regexp: matcher = re.compile(skip_path_regexp) files = [file for file in files if not matcher.match(os.fspath(file.resolve()))] diff --git a/tests/always/test_connection.py b/tests/always/test_connection.py index b367e9897262..43e7521a24e6 100644 --- a/tests/always/test_connection.py +++ b/tests/always/test_connection.py @@ -347,7 +347,7 @@ def test_connection_extra_with_encryption_rotate_fernet_key(self): ), ] - @pytest.mark.parametrize("test_config", [x for x in test_from_uri_params]) + @pytest.mark.parametrize("test_config", test_from_uri_params) def test_connection_from_uri(self, test_config: UriTestCaseConfig): connection = Connection(uri=test_config.test_uri) @@ -369,7 +369,7 @@ def test_connection_from_uri(self, test_config: UriTestCaseConfig): self.mask_secret.assert_has_calls(expected_calls) - @pytest.mark.parametrize("test_config", [x for x in test_from_uri_params]) + @pytest.mark.parametrize("test_config", test_from_uri_params) def test_connection_get_uri_from_uri(self, test_config: UriTestCaseConfig): """ This test verifies that when we create a conn_1 from URI, and we generate a URI from that conn, that @@ -390,7 +390,7 @@ def test_connection_get_uri_from_uri(self, test_config: UriTestCaseConfig): assert connection.schema == new_conn.schema assert connection.extra_dejson == new_conn.extra_dejson - @pytest.mark.parametrize("test_config", [x for x in test_from_uri_params]) + @pytest.mark.parametrize("test_config", test_from_uri_params) def test_connection_get_uri_from_conn(self, test_config: UriTestCaseConfig): """ This test verifies that if we create conn_1 from attributes (rather than from URI), and we generate a diff --git a/tests/conftest.py b/tests/conftest.py index 2b431f1772de..a652cb9cef48 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -134,7 +134,7 @@ def pytest_print(text): # It is very unlikely that the user wants to display only numbers, but probably # the user just wants to count the queries. exit_stack.enter_context(count_queries(print_fn=pytest_print)) - elif any(c for c in ["time", "trace", "sql", "parameters"]): + elif any(c in columns for c in ["time", "trace", "sql", "parameters"]): exit_stack.enter_context( trace_queries( display_num="num" in columns, diff --git a/tests/models/test_mappedoperator.py b/tests/models/test_mappedoperator.py index 6d4a2fbca5cf..8366acf0b7ea 100644 --- a/tests/models/test_mappedoperator.py +++ b/tests/models/test_mappedoperator.py @@ -668,7 +668,7 @@ def execute(self, context): class ConsumeXcomOperator(PushXcomOperator): def execute(self, context): - assert {i for i in self.arg1} == {1, 2, 3} + assert set(self.arg1) == {1, 2, 3} with dag_maker("test_all_xcomargs_from_mapped_tasks_are_consumable"): op1 = PushXcomOperator.partial(task_id="op1").expand(arg1=[1, 2, 3]) diff --git a/tests/models/test_skipmixin.py b/tests/models/test_skipmixin.py index 547dbec5b420..103abc3ef67f 100644 --- a/tests/models/test_skipmixin.py +++ b/tests/models/test_skipmixin.py @@ -147,7 +147,7 @@ def task_group_op(k): branch_b = EmptyOperator(task_id="branch_b") branch_op(k) >> [branch_a, branch_b] - task_group_op.expand(k=[i for i in range(2)]) + task_group_op.expand(k=[0, 1]) dag_maker.create_dagrun() branch_op_ti_0 = TI(dag.get_task("task_group_op.branch_op"), execution_date=DEFAULT_DATE, map_index=0) diff --git a/tests/providers/amazon/aws/sensors/test_eks.py b/tests/providers/amazon/aws/sensors/test_eks.py index fa5457f88958..0bb625532d9f 100644 --- a/tests/providers/amazon/aws/sensors/test_eks.py +++ b/tests/providers/amazon/aws/sensors/test_eks.py @@ -42,13 +42,9 @@ NODEGROUP_NAME = "test_nodegroup" TASK_ID = "test_eks_sensor" -CLUSTER_PENDING_STATES = frozenset(frozenset({state for state in ClusterStates}) - CLUSTER_TERMINAL_STATES) -FARGATE_PENDING_STATES = frozenset( - frozenset({state for state in FargateProfileStates}) - FARGATE_TERMINAL_STATES -) -NODEGROUP_PENDING_STATES = frozenset( - frozenset({state for state in NodegroupStates}) - NODEGROUP_TERMINAL_STATES -) +CLUSTER_PENDING_STATES = frozenset(ClusterStates) - frozenset(CLUSTER_TERMINAL_STATES) +FARGATE_PENDING_STATES = frozenset(FargateProfileStates) - frozenset(FARGATE_TERMINAL_STATES) +NODEGROUP_PENDING_STATES = frozenset(NodegroupStates) - frozenset(NODEGROUP_TERMINAL_STATES) class TestEksClusterStateSensor: diff --git a/tests/providers/google/cloud/log/test_stackdriver_task_handler.py b/tests/providers/google/cloud/log/test_stackdriver_task_handler.py index ca489efb45db..e9a79629801f 100644 --- a/tests/providers/google/cloud/log/test_stackdriver_task_handler.py +++ b/tests/providers/google/cloud/log/test_stackdriver_task_handler.py @@ -311,7 +311,7 @@ def test_should_read_logs_with_custom_resources(self, mock_client, mock_get_cred entry = mock.MagicMock(json_payload={"message": "TEXT"}) page = mock.MagicMock(entries=[entry, entry], next_page_token=None) - mock_client.return_value.list_log_entries.return_value.pages = (n for n in [page]) + mock_client.return_value.list_log_entries.return_value.pages = iter([page]) logs, metadata = stackdriver_task_handler.read(self.ti) mock_client.return_value.list_log_entries.assert_called_once_with( diff --git a/tests/providers/google/cloud/transfers/test_sql_to_gcs.py b/tests/providers/google/cloud/transfers/test_sql_to_gcs.py index dd0e5a42d639..03d6ca36b879 100644 --- a/tests/providers/google/cloud/transfers/test_sql_to_gcs.py +++ b/tests/providers/google/cloud/transfers/test_sql_to_gcs.py @@ -555,9 +555,7 @@ def test__write_local_data_files_csv_does_not_write_on_empty_rows(self): files = op._write_local_data_files(cursor) # Raises StopIteration when next is called because generator returns no files with pytest.raises(StopIteration): - next(files)["file_handle"] - - assert len([f for f in files]) == 0 + next(files) def test__write_local_data_files_csv_writes_empty_file_with_write_on_empty(self): op = DummySQLToGCSOperator( diff --git a/tests/providers/google/cloud/triggers/test_cloud_storage_transfer_service.py b/tests/providers/google/cloud/triggers/test_cloud_storage_transfer_service.py index a7108d69380f..ec6ed4b917f7 100644 --- a/tests/providers/google/cloud/triggers/test_cloud_storage_transfer_service.py +++ b/tests/providers/google/cloud/triggers/test_cloud_storage_transfer_service.py @@ -69,7 +69,7 @@ def mock_jobs(names: list[str], latest_operation_names: list[str | None]): for job, name in zip(jobs, names): job.name = name mock_obj = mock.MagicMock() - mock_obj.__aiter__.return_value = (job for job in jobs) + mock_obj.__aiter__.return_value = iter(jobs) return mock_obj diff --git a/tests/sensors/test_external_task_sensor.py b/tests/sensors/test_external_task_sensor.py index e84b3f69f48e..4422cbe48c87 100644 --- a/tests/sensors/test_external_task_sensor.py +++ b/tests/sensors/test_external_task_sensor.py @@ -135,7 +135,7 @@ def dummy_mapped_task(x: int): return x dummy_task() - dummy_mapped_task.expand(x=[i for i in map_indexes]) + dummy_mapped_task.expand(x=list(map_indexes)) SerializedDagModel.write_dag(dag) @@ -1089,7 +1089,7 @@ def run_tasks(dag_bag, execution_date=DEFAULT_DATE, session=None): # this is equivalent to topological sort. It would not work in general case # but it works for our case because we specifically constructed test DAGS # in the way that those two sort methods are equivalent - tasks = sorted((ti for ti in dagrun.task_instances), key=lambda ti: ti.task_id) + tasks = sorted(dagrun.task_instances, key=lambda ti: ti.task_id) for ti in tasks: ti.refresh_from_task(dag.get_task(ti.task_id)) tis[ti.task_id] = ti @@ -1478,7 +1478,7 @@ def dummy_task(x: int): mode="reschedule", ) - body = dummy_task.expand(x=[i for i in range(5)]) + body = dummy_task.expand(x=range(5)) tail = ExternalTaskMarker( task_id="tail", external_dag_id=dag.dag_id, @@ -1524,7 +1524,7 @@ def test_clear_overlapping_external_task_marker_mapped_tasks(dag_bag_head_tail_m include_downstream=True, include_upstream=False, ) - task_ids = [tid for tid in dag.task_dict] + task_ids = list(dag.task_dict) assert ( dag.clear( start_date=DEFAULT_DATE, diff --git a/tests/system/providers/amazon/aws/example_s3_to_sql.py b/tests/system/providers/amazon/aws/example_s3_to_sql.py index ee110dce4fdc..4910149961e2 100644 --- a/tests/system/providers/amazon/aws/example_s3_to_sql.py +++ b/tests/system/providers/amazon/aws/example_s3_to_sql.py @@ -177,7 +177,7 @@ def parse_csv_to_list(filepath): import csv with open(filepath, newline="") as file: - return [row for row in csv.reader(file)] + return list(csv.reader(file)) transfer_s3_to_sql = S3ToSqlOperator( task_id="transfer_s3_to_sql", From 6996d308a6ae85ae44f3f807aaf9921f9068939c Mon Sep 17 00:00:00 2001 From: Hussein Awala Date: Mon, 21 Aug 2023 11:50:12 +0200 Subject: [PATCH 047/117] Create a new method used to resume the task in order to implement specific logic for operators (#33424) * Create a generic method used to resume the task in order to implement specific logic for each operator * Provide resume_execution args * add a unit test * Fix soft fail after a trigger timeout * wrap the exception with a str to get the message * Apply suggestions from code review Co-authored-by: Tzu-ping Chung --------- Co-authored-by: Tzu-ping Chung (cherry picked from commit 852f85bc61c04fcf39618b8a7ee5ec885f8fde28) --- airflow/models/baseoperator.py | 17 +++++++++++++++++ airflow/models/taskinstance.py | 22 ++++++---------------- airflow/sensors/base.py | 9 +++++++++ tests/sensors/test_base.py | 34 ++++++++++++++++++++++++++++++++-- 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 47e6c07ee206..47b2aa504468 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -58,6 +58,7 @@ AirflowException, FailStopDagInvalidTriggerRule, RemovedInAirflow3Warning, + TaskDeferralError, TaskDeferred, ) from airflow.lineage import apply_lineage, prepare_lineage @@ -1590,6 +1591,22 @@ def defer( """ raise TaskDeferred(trigger=trigger, method_name=method_name, kwargs=kwargs, timeout=timeout) + def resume_execution(self, next_method: str, next_kwargs: dict[str, Any] | None, context: Context): + """This method is called when a deferred task is resumed.""" + # __fail__ is a special signal value for next_method that indicates + # this task was scheduled specifically to fail. + if next_method == "__fail__": + next_kwargs = next_kwargs or {} + traceback = next_kwargs.get("traceback") + if traceback is not None: + self.log.error("Trigger failed:\n%s", "\n".join(traceback)) + raise TaskDeferralError(next_kwargs.get("error", "Unknown")) + # Grab the callable off the Operator/Task and add in any kwargs + execute_callable = getattr(self, next_method) + if next_kwargs: + execute_callable = functools.partial(execute_callable, **next_kwargs) + return execute_callable(context) + def unmap(self, resolve: None | dict[str, Any] | tuple[Context, Session]) -> BaseOperator: """Get the "normal" operator from the current operator. diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 74cc5e45ffda..975e615d9541 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -29,7 +29,6 @@ from collections import defaultdict from datetime import datetime, timedelta from enum import Enum -from functools import partial from pathlib import PurePath from types import TracebackType from typing import TYPE_CHECKING, Any, Callable, Collection, Generator, Iterable, Tuple @@ -81,7 +80,6 @@ AirflowTaskTimeout, DagRunNotFound, RemovedInAirflow3Warning, - TaskDeferralError, TaskDeferred, UnmappableXComLengthPushed, UnmappableXComTypePushed, @@ -1710,19 +1708,11 @@ def _execute_task(self, context, task_orig): # If the task has been deferred and is being executed due to a trigger, # then we need to pick the right method to come back to, otherwise # we go for the default execute + execute_callable_kwargs = {} if self.next_method: - # __fail__ is a special signal value for next_method that indicates - # this task was scheduled specifically to fail. - if self.next_method == "__fail__": - next_kwargs = self.next_kwargs or {} - traceback = self.next_kwargs.get("traceback") - if traceback is not None: - self.log.error("Trigger failed:\n%s", "\n".join(traceback)) - raise TaskDeferralError(next_kwargs.get("error", "Unknown")) - # Grab the callable off the Operator/Task and add in any kwargs - execute_callable = getattr(task_to_execute, self.next_method) - if self.next_kwargs: - execute_callable = partial(execute_callable, **self.next_kwargs) + execute_callable = task_to_execute.resume_execution + execute_callable_kwargs["next_method"] = self.next_method + execute_callable_kwargs["next_kwargs"] = self.next_kwargs else: execute_callable = task_to_execute.execute # If a timeout is specified for the task, make it fail @@ -1742,12 +1732,12 @@ def _execute_task(self, context, task_orig): raise AirflowTaskTimeout() # Run task in timeout wrapper with timeout(timeout_seconds): - result = execute_callable(context=context) + result = execute_callable(context=context, **execute_callable_kwargs) except AirflowTaskTimeout: task_to_execute.on_kill() raise else: - result = execute_callable(context=context) + result = execute_callable(context=context, **execute_callable_kwargs) with create_session() as session: if task_to_execute.do_xcom_push: xcom_value = result diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index 792d907d1f50..3f8b6bf2e6bc 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -35,6 +35,7 @@ AirflowSensorTimeout, AirflowSkipException, AirflowTaskTimeout, + TaskDeferralError, ) from airflow.executors.executor_loader import ExecutorLoader from airflow.models.baseoperator import BaseOperator @@ -281,6 +282,14 @@ def run_duration() -> float: self.log.info("Success criteria met. Exiting.") return xcom_value + def resume_execution(self, next_method: str, next_kwargs: dict[str, Any] | None, context: Context): + try: + return super().resume_execution(next_method, next_kwargs, context) + except (AirflowException, TaskDeferralError) as e: + if self.soft_fail: + raise AirflowSkipException(str(e)) from e + raise + def _get_next_poke_interval( self, started_at: datetime.datetime | float, diff --git a/tests/sensors/test_base.py b/tests/sensors/test_base.py index e4e6ac5ad5d5..4dff8222e200 100644 --- a/tests/sensors/test_base.py +++ b/tests/sensors/test_base.py @@ -23,7 +23,12 @@ import pytest import time_machine -from airflow.exceptions import AirflowException, AirflowRescheduleException, AirflowSensorTimeout +from airflow.exceptions import ( + AirflowException, + AirflowRescheduleException, + AirflowSensorTimeout, + AirflowSkipException, +) from airflow.executors.debug_executor import DebugExecutor from airflow.executors.executor_constants import ( CELERY_EXECUTOR, @@ -37,7 +42,7 @@ ) from airflow.executors.local_executor import LocalExecutor from airflow.executors.sequential_executor import SequentialExecutor -from airflow.models import TaskReschedule +from airflow.models import TaskInstance, TaskReschedule from airflow.models.xcom import XCom from airflow.operators.empty import EmptyOperator from airflow.providers.celery.executors.celery_executor import CeleryExecutor @@ -70,6 +75,15 @@ def poke(self, context: Context): return self.return_value +class DummyAsyncSensor(BaseSensorOperator): + def __init__(self, return_value=False, **kwargs): + super().__init__(**kwargs) + self.return_value = return_value + + def execute_complete(self, context, event=None): + raise AirflowException("Should be skipped") + + class DummySensorWithXcomValue(BaseSensorOperator): def __init__(self, return_value=False, xcom_value=None, **kwargs): super().__init__(**kwargs) @@ -910,3 +924,19 @@ def test_poke_mode_only_bad_poke(self): sensor = DummyPokeOnlySensor(task_id="foo", mode="poke", poke_changes_mode=True) with pytest.raises(ValueError, match="Cannot set mode to 'reschedule'. Only 'poke' is acceptable"): sensor.poke({}) + + +class TestAsyncSensor: + @pytest.mark.parametrize( + "soft_fail, expected_exception", + [ + (True, AirflowSkipException), + (False, AirflowException), + ], + ) + def test_fail_after_resuming_deffered_sensor(self, soft_fail, expected_exception): + async_sensor = DummyAsyncSensor(task_id="dummy_async_sensor", soft_fail=soft_fail) + ti = TaskInstance(task=async_sensor) + ti.next_method = "execute_complete" + with pytest.raises(expected_exception): + ti._execute_task({}, None) From dfe129f77c4232a604a94fde790c94e88e7c7363 Mon Sep 17 00:00:00 2001 From: Wei Lee Date: Sat, 26 Aug 2023 02:11:00 +0800 Subject: [PATCH 048/117] Respect "soft_fail" argument when "poke" is called (#33401) * feat(sensors/base): raise AirflowSkipException if soft_fail is set to True and exception occurs after running poke() * test(sensor/base): add test case for respecting soft_fail option when other kinds of exception is raised (cherry picked from commit d91c481ac9051316d0e61b02eda5a7e21bb6ac5b) --- airflow/sensors/base.py | 7 ++++++- tests/sensors/test_base.py | 30 +++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index 3f8b6bf2e6bc..d737a2cb5d49 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -240,14 +240,19 @@ def run_duration() -> float: except ( AirflowSensorTimeout, AirflowTaskTimeout, - AirflowSkipException, AirflowFailException, ) as e: + if self.soft_fail: + raise AirflowSkipException("Skipping due to soft_fail is set to True.") from e + raise e + except AirflowSkipException as e: raise e except Exception as e: if self.silent_fail: logging.error("Sensor poke failed: \n %s", traceback.format_exc()) poke_return = False + elif self.soft_fail: + raise AirflowSkipException("Skipping due to soft_fail is set to True.") from e else: raise e diff --git a/tests/sensors/test_base.py b/tests/sensors/test_base.py index 4dff8222e200..2dbd5cc68641 100644 --- a/tests/sensors/test_base.py +++ b/tests/sensors/test_base.py @@ -25,9 +25,11 @@ from airflow.exceptions import ( AirflowException, + AirflowFailException, AirflowRescheduleException, AirflowSensorTimeout, AirflowSkipException, + AirflowTaskTimeout, ) from airflow.executors.debug_executor import DebugExecutor from airflow.executors.executor_constants import ( @@ -48,9 +50,7 @@ from airflow.providers.celery.executors.celery_executor import CeleryExecutor from airflow.providers.celery.executors.celery_kubernetes_executor import CeleryKubernetesExecutor from airflow.providers.cncf.kubernetes.executors.kubernetes_executor import KubernetesExecutor -from airflow.providers.cncf.kubernetes.executors.local_kubernetes_executor import ( - LocalKubernetesExecutor, -) +from airflow.providers.cncf.kubernetes.executors.local_kubernetes_executor import LocalKubernetesExecutor from airflow.sensors.base import BaseSensorOperator, PokeReturnValue, poke_mode_only from airflow.ti_deps.deps.ready_to_reschedule import ReadyToRescheduleDep from airflow.utils import timezone @@ -176,6 +176,28 @@ def test_soft_fail(self, make_sensor): if ti.task_id == DUMMY_OP: assert ti.state == State.NONE + @pytest.mark.parametrize( + "exception_cls", + ( + AirflowSensorTimeout, + AirflowTaskTimeout, + AirflowFailException, + Exception, + ), + ) + def test_soft_fail_with_non_skip_exception(self, make_sensor, exception_cls): + sensor, dr = make_sensor(False, soft_fail=True) + sensor.poke = Mock(side_effect=[exception_cls(None)]) + + self._run(sensor) + tis = dr.get_task_instances() + assert len(tis) == 2 + for ti in tis: + if ti.task_id == SENSOR_OP: + assert ti.state == State.SKIPPED + if ti.task_id == DUMMY_OP: + assert ti.state == State.NONE + def test_soft_fail_with_retries(self, make_sensor): sensor, dr = make_sensor( return_value=False, soft_fail=True, retries=1, retry_delay=timedelta(milliseconds=1) @@ -518,7 +540,6 @@ def run_duration(): assert sensor._get_next_poke_interval(started_at, run_duration, 2) == sensor.poke_interval def test_sensor_with_exponential_backoff_on(self): - sensor = DummySensor( task_id=SENSOR_OP, return_value=None, poke_interval=5, timeout=60, exponential_backoff=True ) @@ -575,7 +596,6 @@ def run_duration(): assert intervals[0] == intervals[-1] def test_sensor_with_exponential_backoff_on_and_max_wait(self): - sensor = DummySensor( task_id=SENSOR_OP, return_value=None, From a597d5eb8bbcfcde2e607a2821b64bc917c1fde3 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Fri, 18 Aug 2023 14:22:28 +0800 Subject: [PATCH 049/117] Allow timetable to slightly miss catchup cutoff (#33404) (cherry picked from commit a6299d47eac9cba23d54e5b1f3ff589e76871eae) --- airflow/timetables/trigger.py | 2 +- newsfragments/33404.significant.rst | 16 ++++++++++++++++ tests/timetables/test_trigger_timetable.py | 18 +++++++++++++++--- 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 newsfragments/33404.significant.rst diff --git a/airflow/timetables/trigger.py b/airflow/timetables/trigger.py index 7807542da5a6..e5c5b7c15304 100644 --- a/airflow/timetables/trigger.py +++ b/airflow/timetables/trigger.py @@ -89,7 +89,7 @@ def next_dagrun_info( else: next_start_time = self._align_to_next(restriction.earliest) else: - start_time_candidates = [self._align_to_next(DateTime.utcnow())] + start_time_candidates = [self._align_to_prev(DateTime.utcnow())] if last_automated_data_interval is not None: start_time_candidates.append(self._get_next(last_automated_data_interval.end)) if restriction.earliest is not None: diff --git a/newsfragments/33404.significant.rst b/newsfragments/33404.significant.rst new file mode 100644 index 000000000000..e0f14168cbc1 --- /dev/null +++ b/newsfragments/33404.significant.rst @@ -0,0 +1,16 @@ +CronTriggerTimetable is now less aggressive when trying to skip a run + +When setting ``catchup=False``, CronTriggerTimetable no longer skips a run if +the scheduler does not query the timetable immediately after the previous run +has been triggered. + +This should not affect scheduling in most cases, but can change the behaviour if +a DAG is paused-unpaused to manually skip a run. Previously, the timetable (with +``catchup=False``) would only start a run after a DAG is unpaused, but with this +change, the scheduler would try to look at little bit back to schedule the +previous run that covers a part of the period when the DAG was paused. This +means you will need to keep a DAG paused longer (namely, for the entire cron +period to pass) to really skip a run. + +Note that this is also the behaviour exhibited by various other cron-based +scheduling tools, such as anacron. diff --git a/tests/timetables/test_trigger_timetable.py b/tests/timetables/test_trigger_timetable.py index 6f1d44479f1b..58e1f49df905 100644 --- a/tests/timetables/test_trigger_timetable.py +++ b/tests/timetables/test_trigger_timetable.py @@ -48,11 +48,11 @@ [ pytest.param( None, - CURRENT_TIME + DELTA_FROM_MIDNIGHT, + YESTERDAY + DELTA_FROM_MIDNIGHT, id="first-run", ), pytest.param( - PREV_DATA_INTERVAL_EXACT, + DataInterval.exact(YESTERDAY + DELTA_FROM_MIDNIGHT), CURRENT_TIME + DELTA_FROM_MIDNIGHT, id="before-now", ), @@ -89,9 +89,21 @@ def test_daily_cron_trigger_no_catchup_first_starts_at_next_schedule( pytest.param( pendulum.DateTime(2022, 7, 27, 0, 30, 0, tzinfo=TIMEZONE), START_DATE, - DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE)), + DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 0, 0, 0, tzinfo=TIMEZONE)), id="current_time_not_on_boundary", ), + pytest.param( + pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE), + START_DATE, + DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE)), + id="current_time_miss_one_interval_on_boundary", + ), + pytest.param( + pendulum.DateTime(2022, 7, 27, 1, 30, 0, tzinfo=TIMEZONE), + START_DATE, + DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE)), + id="current_time_miss_one_interval_not_on_boundary", + ), pytest.param( pendulum.DateTime(2022, 7, 27, 0, 30, 0, tzinfo=TIMEZONE), pendulum.DateTime(2199, 12, 31, 22, 30, 0, tzinfo=TIMEZONE), From 0fbb01b5b9848535d2c3973099391b6adb9eef74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Wed, 16 Aug 2023 17:45:18 +0000 Subject: [PATCH 050/117] Import utc from datetime and normalize its import (#33450) (cherry picked from commit bfe08a79db8130c499883f014121be570ec071bd) --- airflow/example_dags/example_sensors.py | 19 +++++++++++-------- .../providers/cncf/kubernetes/triggers/pod.py | 7 +++---- .../google/cloud/operators/workflows.py | 11 ++++++----- .../providers/amazon/aws/utils/test_utils.py | 6 ++---- .../cncf/kubernetes/triggers/test_pod.py | 5 ++--- .../google/cloud/operators/test_workflows.py | 9 ++++++--- .../cloud/transfers/test_postgres_to_gcs.py | 3 +-- .../cloud/triggers/test_kubernetes_engine.py | 5 ++--- 8 files changed, 33 insertions(+), 32 deletions(-) diff --git a/airflow/example_dags/example_sensors.py b/airflow/example_dags/example_sensors.py index d9e3158f544b..a79b61ceed01 100644 --- a/airflow/example_dags/example_sensors.py +++ b/airflow/example_dags/example_sensors.py @@ -17,10 +17,9 @@ from __future__ import annotations -from datetime import datetime, timedelta +import datetime import pendulum -from pytz import UTC from airflow.models import DAG from airflow.operators.bash import BashOperator @@ -54,32 +53,36 @@ def failure_callable(): tags=["example"], ) as dag: # [START example_time_delta_sensor] - t0 = TimeDeltaSensor(task_id="wait_some_seconds", delta=timedelta(seconds=2)) + t0 = TimeDeltaSensor(task_id="wait_some_seconds", delta=datetime.timedelta(seconds=2)) # [END example_time_delta_sensor] # [START example_time_delta_sensor_async] - t0a = TimeDeltaSensorAsync(task_id="wait_some_seconds_async", delta=timedelta(seconds=2)) + t0a = TimeDeltaSensorAsync(task_id="wait_some_seconds_async", delta=datetime.timedelta(seconds=2)) # [END example_time_delta_sensor_async] # [START example_time_sensors] - t1 = TimeSensor(task_id="fire_immediately", target_time=datetime.now(tz=UTC).time()) + t1 = TimeSensor( + task_id="fire_immediately", target_time=datetime.datetime.now(tz=datetime.timezone.utc).time() + ) t2 = TimeSensor( task_id="timeout_after_second_date_in_the_future", timeout=1, soft_fail=True, - target_time=(datetime.now(tz=UTC) + timedelta(hours=1)).time(), + target_time=(datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(hours=1)).time(), ) # [END example_time_sensors] # [START example_time_sensors_async] - t1a = TimeSensorAsync(task_id="fire_immediately_async", target_time=datetime.now(tz=UTC).time()) + t1a = TimeSensorAsync( + task_id="fire_immediately_async", target_time=datetime.datetime.now(tz=datetime.timezone.utc).time() + ) t2a = TimeSensorAsync( task_id="timeout_after_second_date_in_the_future_async", timeout=1, soft_fail=True, - target_time=(datetime.now(tz=UTC) + timedelta(hours=1)).time(), + target_time=(datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(hours=1)).time(), ) # [END example_time_sensors_async] diff --git a/airflow/providers/cncf/kubernetes/triggers/pod.py b/airflow/providers/cncf/kubernetes/triggers/pod.py index 6443dfb63f3c..f9ab8c268b81 100644 --- a/airflow/providers/cncf/kubernetes/triggers/pod.py +++ b/airflow/providers/cncf/kubernetes/triggers/pod.py @@ -17,13 +17,12 @@ from __future__ import annotations import asyncio +import datetime import warnings from asyncio import CancelledError -from datetime import datetime from enum import Enum from typing import Any, AsyncIterator -import pytz from kubernetes_asyncio.client.models import V1Pod from airflow.exceptions import AirflowProviderDeprecationWarning @@ -74,7 +73,7 @@ def __init__( self, pod_name: str, pod_namespace: str, - trigger_start_time: datetime, + trigger_start_time: datetime.datetime, base_container_name: str, kubernetes_conn_id: str | None = None, poll_interval: float = 2, @@ -167,7 +166,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override] self.log.info("Container is not completed and still working.") if pod_status == PodPhase.PENDING and container_state == ContainerState.UNDEFINED: - delta = datetime.now(tz=pytz.UTC) - self.trigger_start_time + delta = datetime.datetime.now(tz=datetime.timezone.utc) - self.trigger_start_time if delta.total_seconds() >= self.startup_timeout: message = ( f"Pod took longer than {self.startup_timeout} seconds to start. " diff --git a/airflow/providers/google/cloud/operators/workflows.py b/airflow/providers/google/cloud/operators/workflows.py index 4f2517f5da01..2887bd135846 100644 --- a/airflow/providers/google/cloud/operators/workflows.py +++ b/airflow/providers/google/cloud/operators/workflows.py @@ -16,13 +16,12 @@ # under the License. from __future__ import annotations +import datetime import json import re import uuid -from datetime import datetime, timedelta from typing import TYPE_CHECKING, Sequence -import pytz from google.api_core.exceptions import AlreadyExists from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault from google.api_core.retry import Retry @@ -624,7 +623,7 @@ def __init__( *, workflow_id: str, location: str, - start_date_filter: datetime | None = None, + start_date_filter: datetime.datetime | None = None, project_id: str | None = None, retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, @@ -637,7 +636,9 @@ def __init__( self.workflow_id = workflow_id self.location = location - self.start_date_filter = start_date_filter or datetime.now(tz=pytz.UTC) - timedelta(minutes=60) + self.start_date_filter = start_date_filter or datetime.datetime.now( + tz=datetime.timezone.utc + ) - datetime.timedelta(minutes=60) self.project_id = project_id self.retry = retry self.timeout = timeout @@ -668,7 +669,7 @@ def execute(self, context: Context): return [ Execution.to_dict(e) for e in execution_iter - if e.start_time.ToDatetime(tzinfo=pytz.UTC) > self.start_date_filter + if e.start_time.ToDatetime(tzinfo=datetime.timezone.utc) > self.start_date_filter ] diff --git a/tests/providers/amazon/aws/utils/test_utils.py b/tests/providers/amazon/aws/utils/test_utils.py index 66d5f734dc7d..7f951d06dddd 100644 --- a/tests/providers/amazon/aws/utils/test_utils.py +++ b/tests/providers/amazon/aws/utils/test_utils.py @@ -16,9 +16,7 @@ # under the License. from __future__ import annotations -from datetime import datetime - -import pytz +import datetime from airflow.providers.amazon.aws.utils import ( _StringCompareEnum, @@ -28,7 +26,7 @@ get_airflow_version, ) -DT = datetime(2000, 1, 1, tzinfo=pytz.UTC) +DT = datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc) EPOCH = 946_684_800 diff --git a/tests/providers/cncf/kubernetes/triggers/test_pod.py b/tests/providers/cncf/kubernetes/triggers/test_pod.py index fbfff17278c7..5719dcefcac3 100644 --- a/tests/providers/cncf/kubernetes/triggers/test_pod.py +++ b/tests/providers/cncf/kubernetes/triggers/test_pod.py @@ -18,13 +18,12 @@ from __future__ import annotations import asyncio +import datetime import logging from asyncio import CancelledError, Future -from datetime import datetime from unittest import mock import pytest -import pytz from kubernetes.client import models as k8s from airflow.providers.cncf.kubernetes.triggers.pod import ContainerState, KubernetesPodTrigger @@ -41,7 +40,7 @@ IN_CLUSTER = False GET_LOGS = True STARTUP_TIMEOUT_SECS = 120 -TRIGGER_START_TIME = datetime.now(tz=pytz.UTC) +TRIGGER_START_TIME = datetime.datetime.now(tz=datetime.timezone.utc) FAILED_RESULT_MSG = "Test message that appears when trigger have failed event." BASE_CONTAINER_NAME = "base" ON_FINISH_ACTION = "delete_pod" diff --git a/tests/providers/google/cloud/operators/test_workflows.py b/tests/providers/google/cloud/operators/test_workflows.py index ad202fa5929d..7ecd1627fe20 100644 --- a/tests/providers/google/cloud/operators/test_workflows.py +++ b/tests/providers/google/cloud/operators/test_workflows.py @@ -19,7 +19,6 @@ import datetime from unittest import mock -import pytz from google.protobuf.timestamp_pb2 import Timestamp from airflow.providers.google.cloud.operators.workflows import ( @@ -171,7 +170,9 @@ class TestWorkflowsListWorkflowsOperator: @mock.patch(BASE_PATH.format("WorkflowsHook")) def test_execute(self, mock_hook, mock_object): timestamp = Timestamp() - timestamp.FromDatetime(datetime.datetime.now(tz=pytz.UTC) + datetime.timedelta(minutes=5)) + timestamp.FromDatetime( + datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(minutes=5) + ) workflow_mock = mock.MagicMock() workflow_mock.start_time = timestamp mock_hook.return_value.list_workflows.return_value = [workflow_mock] @@ -334,7 +335,9 @@ class TestWorkflowExecutionsListExecutionsOperator: @mock.patch(BASE_PATH.format("WorkflowsHook")) def test_execute(self, mock_hook, mock_object): timestamp = Timestamp() - timestamp.FromDatetime(datetime.datetime.now(tz=pytz.UTC) + datetime.timedelta(minutes=5)) + timestamp.FromDatetime( + datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(minutes=5) + ) execution_mock = mock.MagicMock() execution_mock.start_time = timestamp mock_hook.return_value.list_executions.return_value = [execution_mock] diff --git a/tests/providers/google/cloud/transfers/test_postgres_to_gcs.py b/tests/providers/google/cloud/transfers/test_postgres_to_gcs.py index 4d9934f188dc..8d0660b8faa4 100644 --- a/tests/providers/google/cloud/transfers/test_postgres_to_gcs.py +++ b/tests/providers/google/cloud/transfers/test_postgres_to_gcs.py @@ -21,7 +21,6 @@ from unittest.mock import patch import pytest -import pytz from airflow.providers.google.cloud.transfers.postgres_to_gcs import PostgresToGCSOperator from airflow.providers.postgres.hooks.postgres import PostgresHook @@ -110,7 +109,7 @@ def _assert_uploaded_file_content(self, bucket, obj, tmp_filename, mime_type, gz (datetime.date(1000, 1, 2), "1000-01-02"), (datetime.datetime(1970, 1, 1, 1, 0, tzinfo=None), "1970-01-01T01:00:00"), ( - datetime.datetime(2022, 1, 1, 2, 0, tzinfo=pytz.UTC), + datetime.datetime(2022, 1, 1, 2, 0, tzinfo=datetime.timezone.utc), 1641002400.0, ), (datetime.time(hour=0, minute=0, second=0), "0:00:00"), diff --git a/tests/providers/google/cloud/triggers/test_kubernetes_engine.py b/tests/providers/google/cloud/triggers/test_kubernetes_engine.py index e695822d3863..6a781696257b 100644 --- a/tests/providers/google/cloud/triggers/test_kubernetes_engine.py +++ b/tests/providers/google/cloud/triggers/test_kubernetes_engine.py @@ -18,13 +18,12 @@ from __future__ import annotations import asyncio +import datetime import logging from asyncio import CancelledError, Future -from datetime import datetime from unittest import mock import pytest -import pytz from google.cloud.container_v1.types import Operation from kubernetes.client import models as k8s @@ -43,7 +42,7 @@ SHOULD_DELETE_POD = True GET_LOGS = True STARTUP_TIMEOUT_SECS = 120 -TRIGGER_START_TIME = datetime.now(tz=pytz.UTC) +TRIGGER_START_TIME = datetime.datetime.now(tz=datetime.timezone.utc) CLUSTER_URL = "https://test-host" SSL_CA_CERT = "TEST_SSL_CA_CERT_CONTENT" FAILED_RESULT_MSG = "Test message that appears when trigger have failed event." From 24b11c9950e994dc6ee7e893bb2f30b5b04c8c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vandon?= Date: Mon, 21 Aug 2023 14:14:33 -0700 Subject: [PATCH 051/117] make `conf.set` case insensitive (#33452) * make `conf.set` case insensitive `conf.get` is insensitive (it converts section and key to lower case) but set is not, which can lead to surprising behavior (see the test, which is not passing without the fix). I suggest that we override set as well to fix that. Any value that was set before with upper case was unreacheable. * fix remove_option as well * away with the str() * add significant change newsfragment (cherry picked from commit abbd5677bab4a84b1d35e7723c7dfbb155ca9144) --- airflow/configuration.py | 16 ++++++++++++++-- newsfragments/33452.significant.rst | 11 +++++++++++ tests/core/test_configuration.py | 7 +++++++ 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 newsfragments/33452.significant.rst diff --git a/airflow/configuration.py b/airflow/configuration.py index 22cb7fa04432..956d7081ad91 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -950,8 +950,8 @@ def get( # type: ignore[override,misc] _extra_stacklevel: int = 0, **kwargs, ) -> str | None: - section = str(section).lower() - key = str(key).lower() + section = section.lower() + key = key.lower() warning_emitted = False deprecated_section: str | None deprecated_key: str | None @@ -1307,6 +1307,16 @@ def has_option(self, section: str, option: str) -> bool: except (NoOptionError, NoSectionError): return False + def set(self, section: str, option: str, value: str | None = None) -> None: + """ + Set an option to the given value. + + This override just makes sure the section and option are lower case, to match what we do in `get`. + """ + section = section.lower() + option = option.lower() + super().set(section, option, value) + def remove_option(self, section: str, option: str, remove_default: bool = True): """ Remove an option if it exists in config from a file or default config. @@ -1314,6 +1324,8 @@ def remove_option(self, section: str, option: str, remove_default: bool = True): If both of config have the same option, this removes the option in both configs unless remove_default=False. """ + section = section.lower() + option = option.lower() if super().has_option(section, option): super().remove_option(section, option) diff --git a/newsfragments/33452.significant.rst b/newsfragments/33452.significant.rst new file mode 100644 index 000000000000..59a5d485ba13 --- /dev/null +++ b/newsfragments/33452.significant.rst @@ -0,0 +1,11 @@ +``conf.set()`` becomes case insensitive to match ``conf.get()`` behavior. Also, ``conf.get()`` will now break if used with non-string parameters. + +``conf.set(section, key, value)`` used to be case sensitive, i.e. ``conf.set("SECTION", "KEY", value)`` +and ``conf.set("section", "key", value)`` were stored as two distinct configurations. +This was inconsistent with the behavior of ``conf.get(section, key)``, which was always converting the section and key to lower case. + +As a result, configuration options set with upper case characters in the section or key were unreachable. +That's why we are now converting section and key to lower case in ``conf.set`` too. + +We also changed a bit the behavior of ``conf.get()``. It used to allow objects that are not strings in the section or key. +Doing this will now result in an exception. For instance, ``conf.get("section", 123)`` needs to be replaced with ``conf.get("section", "123")``. diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 3a24d33111aa..6d6641a73e05 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -113,6 +113,13 @@ def test_case_sensitivity(self): assert conf.get("core", "PERCENT") == "with%inside" assert conf.get("CORE", "PERCENT") == "with%inside" + @conf_vars({("core", "key"): "test_value"}) + def test_set_and_get_with_upper_case(self): + # both get and set should be case insensitive + assert conf.get("Core", "Key") == "test_value" + conf.set("Core", "Key", "new_test_value") + assert conf.get("Core", "Key") == "new_test_value" + def test_config_as_dict(self): """Test that getting config as dict works even if environment has non-legal env vars""" From 5930e189088f7df4b155c9b5fad9637de18ba815 Mon Sep 17 00:00:00 2001 From: Pierre Jeambrun Date: Wed, 16 Aug 2023 23:36:22 +0200 Subject: [PATCH 052/117] Fix Cluster Activity Health margin (#33456) (cherry picked from commit 2ae6d4bf5d5dab070d806a22071b23b4f40946f3) --- airflow/www/static/js/cluster-activity/live-metrics/Health.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/airflow/www/static/js/cluster-activity/live-metrics/Health.tsx b/airflow/www/static/js/cluster-activity/live-metrics/Health.tsx index 49bafaeac042..438ce24c2100 100644 --- a/airflow/www/static/js/cluster-activity/live-metrics/Health.tsx +++ b/airflow/www/static/js/cluster-activity/live-metrics/Health.tsx @@ -105,6 +105,7 @@ const Health = (props: CenterProps) => { title="Triggerer" status={data?.triggerer?.status} latestHeartbeat={data?.triggerer?.latestTriggererHeartbeat} + mb={3} /> Date: Tue, 22 Aug 2023 21:03:34 +0200 Subject: [PATCH 053/117] Revert "respect soft_fail argument when ExternalTaskSensor runs in deferrable mode (#33196)" (#33458) This reverts commit a1b5bdb25a6f9565ac5934a9a458e9b079ccf3ae. (cherry picked from commit 85a567790456732747ddccd37f690d7f3a56d2ba) --- airflow/sensors/base.py | 6 ----- airflow/sensors/external_task.py | 46 ++++++++++++++------------------ 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index d737a2cb5d49..719bc26a1e5c 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -344,12 +344,6 @@ def reschedule(self): def get_serialized_fields(cls): return super().get_serialized_fields() | {"reschedule"} - def raise_failed_or_skiping_exception(self, *, failed_message: str, skipping_message: str = "") -> None: - """Raise AirflowSkipException if self.soft_fail is set to True. Otherwise raise AirflowException.""" - if self.soft_fail: - raise AirflowSkipException(skipping_message or failed_message) - raise AirflowException(failed_message) - def poke_mode_only(cls): """ diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index ffc2cc1313f0..92562589b212 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -222,8 +222,6 @@ def __init__( self.deferrable = deferrable self.poll_interval = poll_interval - self._skipping_message_postfix = " Skipping due to soft_fail." - def _get_dttm_filter(self, context): if self.execution_delta: dttm = context["logical_date"] - self.execution_delta @@ -276,28 +274,32 @@ def poke(self, context: Context, session: Session = NEW_SESSION) -> bool: # Fail if anything in the list has failed. if count_failed > 0: if self.external_task_ids: - failed_message = ( + if self.soft_fail: + raise AirflowSkipException( + f"Some of the external tasks {self.external_task_ids} " + f"in DAG {self.external_dag_id} failed. Skipping due to soft_fail." + ) + raise AirflowException( f"Some of the external tasks {self.external_task_ids} " f"in DAG {self.external_dag_id} failed." ) - - self.raise_failed_or_skiping_exception( - failed_message=failed_message, - skipping_message=f"{failed_message}{self._skipping_message_postfix}", - ) elif self.external_task_group_id: - self.raise_failed_or_skiping_exception( - failed_message=( + if self.soft_fail: + raise AirflowSkipException( f"The external task_group '{self.external_task_group_id}' " - f"in DAG '{self.external_dag_id}' failed." + f"in DAG '{self.external_dag_id}' failed. Skipping due to soft_fail." ) + raise AirflowException( + f"The external task_group '{self.external_task_group_id}' " + f"in DAG '{self.external_dag_id}' failed." ) + else: - failed_message = f"The external DAG {self.external_dag_id} failed." - self.raise_failed_or_skiping_exception( - failed_message=failed_message, - skipping_message=f"{failed_message}{self._skipping_message_postfix}", - ) + if self.soft_fail: + raise AirflowSkipException( + f"The external DAG {self.external_dag_id} failed. Skipping due to soft_fail." + ) + raise AirflowException(f"The external DAG {self.external_dag_id} failed.") count_skipped = -1 if self.skipped_states: @@ -349,20 +351,12 @@ def execute_complete(self, context, event=None): self.log.info("External task %s has executed successfully.", self.external_task_id) return None elif event["status"] == "timeout": - failed_message = "Dag was not started within 1 minute, assuming fail." - self.raise_failed_or_skiping_exception( - failed_message=failed_message, - skipping_message=f"{failed_message}{self._skipping_message_postfix}", - ) + raise AirflowException("Dag was not started within 1 minute, assuming fail.") else: - failed_message = ( + raise AirflowException( "Error occurred while trying to retrieve task status. Please, check the " "name of executed task and Dag." ) - self.raise_failed_or_skiping_exception( - failed_message=failed_message, - skipping_message=f"{failed_message}{self._skipping_message_postfix}", - ) def _check_for_existence(self, session) -> None: dag_to_wait = DagModel.get_current(self.external_dag_id, session) From 00fe5465a13affb6428c8d0e7bfa7af339c7fb80 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Thu, 17 Aug 2023 05:14:26 -0700 Subject: [PATCH 054/117] Static check fixes (#33462) (cherry picked from commit 5ee1bcbef992102b3957db39a90254dfc9c4adeb) --- airflow/models/dag.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/models/dag.py b/airflow/models/dag.py index beb7fe013152..75fee04145de 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -575,7 +575,7 @@ def __init__( template_searchpath = [template_searchpath] self.template_searchpath = template_searchpath self.template_undefined = template_undefined - self.last_loaded = timezone.utcnow() + self.last_loaded: datetime = timezone.utcnow() self.safe_dag_id = dag_id.replace(".", "__dot__") self.max_active_runs = max_active_runs if self.timetable.active_runs_limit is not None: @@ -620,8 +620,8 @@ def __init__( # To keep it in parity with Serialized DAGs # and identify if DAG has on_*_callback without actually storing them in Serialized JSON - self.has_on_success_callback = self.on_success_callback is not None - self.has_on_failure_callback = self.on_failure_callback is not None + self.has_on_success_callback: bool = self.on_success_callback is not None + self.has_on_failure_callback: bool = self.on_failure_callback is not None self._access_control = DAG._upgrade_outdated_dag_access_control(access_control) self.is_paused_upon_creation = is_paused_upon_creation From ab1fd50ef0a84b4be2c82324a631c689ddc310fd Mon Sep 17 00:00:00 2001 From: Aakcht Date: Thu, 17 Aug 2023 23:53:24 +0300 Subject: [PATCH 055/117] Fix secrets backend docs (#33471) (cherry picked from commit 996d8c5c930e6e7f800c0c7a5c0b1ade7516c89f) --- docs/apache-airflow/public-airflow-interface.rst | 4 ++-- .../apache-airflow/security/secrets/secrets-backend/index.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/apache-airflow/public-airflow-interface.rst b/docs/apache-airflow/public-airflow-interface.rst index eebc893062d4..add533f4ed2a 100644 --- a/docs/apache-airflow/public-airflow-interface.rst +++ b/docs/apache-airflow/public-airflow-interface.rst @@ -327,8 +327,8 @@ Secrets Backends ---------------- Airflow can be configured to rely on secrets backends to retrieve -:class:`~airflow.models.connection.Connection` and :class:`~airflow.models.Variables`. -All secrets backends derive from :class:`~airflow.secrets.BaseSecretsBackend`. +:class:`~airflow.models.connection.Connection` and :class:`~airflow.models.variable.Variable`. +All secrets backends derive from :class:`~airflow.secrets.base_secrets.BaseSecretsBackend`. All Secrets Backend implementations are public. You can extend their functionality: diff --git a/docs/apache-airflow/security/secrets/secrets-backend/index.rst b/docs/apache-airflow/security/secrets/secrets-backend/index.rst index 76f50a04248d..b91bef347dff 100644 --- a/docs/apache-airflow/security/secrets/secrets-backend/index.rst +++ b/docs/apache-airflow/security/secrets/secrets-backend/index.rst @@ -100,8 +100,8 @@ capability of Apache Airflow. You can see all those providers in Roll your own secrets backend ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A secrets backend is a subclass of :py:class:`airflow.secrets.BaseSecretsBackend` and must implement either -:py:meth:`~airflow.secrets.BaseSecretsBackend.get_connection` or :py:meth:`~airflow.secrets.BaseSecretsBackend.get_conn_value`. +A secrets backend is a subclass of :py:class:`airflow.secrets.base_secrets.BaseSecretsBackend` and must implement either +:py:meth:`~airflow.secrets.base_secrets.BaseSecretsBackend.get_connection` or :py:meth:`~airflow.secrets.base_secrets.BaseSecretsBackend.get_conn_value`. After writing your backend class, provide the fully qualified class name in the ``backend`` key in the ``[secrets]`` section of ``airflow.cfg``. From 29ffb32057945513c4544d815f063313ba9b1ace Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Thu, 17 Aug 2023 21:40:10 +0530 Subject: [PATCH 056/117] Use absolute path in README.md for broken links on pypi listing (#33473) Observed a couple of links are broken on our pypi project listing https://pypi.org/project/apache-airflow/. They work fine when viewed on GitHub but those relative links don't work fine on the pypi index. Also, applied a few quick-fixes that my PyCharm suggested for the README.md file (cherry picked from commit 5a37025eb0a2acc90c41a1f9701a70521f361e71) --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 41b70e132a17..f44b3d585a3b 100644 --- a/README.md +++ b/README.md @@ -105,10 +105,10 @@ MariaDB is not tested/recommended. **Note**: SQLite is used in Airflow tests. Do not use it in production. We recommend using the latest stable version of SQLite for local development. -**Note**: Airflow currently can be run on POSIX-compliant Operating Systems. For development it is regularly -tested on fairly modern Linux Distros and recent versions of MacOS. +**Note**: Airflow currently can be run on POSIX-compliant Operating Systems. For development, it is regularly +tested on fairly modern Linux Distros and recent versions of macOS. On Windows you can run it via WSL2 (Windows Subsystem for Linux 2) or via Linux Containers. -The work to add Windows support is tracked via [#10388](https://github.com/apache/airflow/issues/10388) but +The work to add Windows support is tracked via [#10388](https://github.com/apache/airflow/issues/10388), but it is not a high priority. You should only use Linux-based distros as "Production" execution environment as this is the only environment that is supported. The only distro that is used in our CI tests and that is used in the [Community managed DockerHub image](https://hub.docker.com/p/apache/airflow) is @@ -258,7 +258,7 @@ packages: is a breaking change for provider because installing the new provider might automatically upgrade Airflow (which might be an undesired side effect of upgrading provider). * **Airflow Helm Chart**: SemVer rules apply to changes in the chart only. SemVer MAJOR and MINOR - versions for the chart are independent from the Airflow version. We aim to keep backwards + versions for the chart are independent of the Airflow version. We aim to keep backwards compatibility of the Helm Chart with all released Airflow 2 versions, but some new features might only work starting from specific Airflow releases. We might however limit the Helm Chart to depend on minimal Airflow version. @@ -323,7 +323,7 @@ we publish an Apache Airflow release. Those images contain: * Base Python installation in versions supported at the time of release for the MINOR version of Airflow released (so there could be different versions for 2.3 and 2.2 line for example) * Libraries required to connect to supported Databases (again the set of databases supported depends - on the MINOR version of Airflow. + on the MINOR version of Airflow) * Predefined set of popular providers (for details see the [Dockerfile](https://raw.githubusercontent.com/apache/airflow/main/Dockerfile)). * Possibility of building your own, custom image where the user can choose their own set of providers and libraries (see [Building the image](https://airflow.apache.org/docs/docker-stack/build.html)) @@ -388,11 +388,11 @@ The important dependencies are: libraries, and we should update them together * `celery`: Celery is crucial component of Airflow as it used for CeleryExecutor (and similar). Celery [follows SemVer](https://docs.celeryq.dev/en/stable/contributing.html?highlight=semver#versions), so - we should upper-bound it to the next MAJOR version. Also when we bump the upper version of the library, - we should make sure Celery Provider minimum Airflow version is updated). + we should upper-bound it to the next MAJOR version. Also, when we bump the upper version of the library, + we should make sure Celery Provider minimum Airflow version is updated. * `kubernetes`: Kubernetes is a crucial component of Airflow as it is used for the KubernetesExecutor (and similar). Kubernetes Python library [follows SemVer](https://github.com/kubernetes-client/python#compatibility), - so we should upper-bound it to the next MAJOR version. Also when we bump the upper version of the library, + so we should upper-bound it to the next MAJOR version. Also, when we bump the upper version of the library, we should make sure Kubernetes Provider minimum Airflow version is updated. ### Approach for dependencies in Airflow Providers and extras @@ -402,7 +402,7 @@ providers that extend the core functionality and are released separately, even i in the same monorepo for convenience. You can read more about the providers in the [Providers documentation](https://airflow.apache.org/docs/apache-airflow-providers/index.html). We also have set of policies implemented for maintaining and releasing community-managed providers as well -as the approach for community vs. 3rd party providers in the [providers](PROVIDERS.rst) document. +as the approach for community vs. 3rd party providers in the [providers](https://github.com/apache/airflow/blob/main/PROVIDERS.rst) document. Those `extras` and `providers` dependencies are maintained in `provider.yaml` of each provider. @@ -430,7 +430,7 @@ If you would like to become a maintainer, please review the Apache Airflow ## Can I use the Apache Airflow logo in my presentation? -Yes! Be sure to abide by the Apache Foundation [trademark policies](https://www.apache.org/foundation/marks/#books) and the Apache Airflow [Brandbook](https://cwiki.apache.org/confluence/display/AIRFLOW/Brandbook). The most up to date logos are found in [this repo](/docs/apache-airflow/img/logos) and on the Apache Software Foundation [website](https://www.apache.org/logos/about.html). +Yes! Be sure to abide by the Apache Foundation [trademark policies](https://www.apache.org/foundation/marks/#books) and the Apache Airflow [Brandbook](https://cwiki.apache.org/confluence/display/AIRFLOW/Brandbook). The most up-to-date logos are found in [this repo](https://github.com/apache/airflow/tree/main/docs/apache-airflow/img/logos/) and on the Apache Software Foundation [website](https://www.apache.org/logos/about.html). ## Airflow merchandise From 502909253e8788530d1b83ddd51c262358c5a94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Sat, 19 Aug 2023 20:23:00 +0000 Subject: [PATCH 057/117] Simplify code around enumerate (#33476) (cherry picked from commit bcefe6109bcabd9bd6daf8b44f7352adda1ed53d) --- airflow/models/baseoperator.py | 3 +-- airflow/providers/apache/hive/transfers/mssql_to_hive.py | 5 ++--- airflow/providers/apache/hive/transfers/vertica_to_hive.py | 5 ++--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 47b2aa504468..b3167fda0670 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -1730,8 +1730,7 @@ def chain(*tasks: DependencyMixin | Sequence[DependencyMixin]) -> None: :param tasks: Individual and/or list of tasks, EdgeModifiers, XComArgs, or TaskGroups to set dependencies """ - for index, up_task in enumerate(tasks[:-1]): - down_task = tasks[index + 1] + for up_task, down_task in zip(tasks, tasks[1:]): if isinstance(up_task, DependencyMixin): up_task.set_downstream(down_task) continue diff --git a/airflow/providers/apache/hive/transfers/mssql_to_hive.py b/airflow/providers/apache/hive/transfers/mssql_to_hive.py index d8039119569c..4d444f1b6914 100644 --- a/airflow/providers/apache/hive/transfers/mssql_to_hive.py +++ b/airflow/providers/apache/hive/transfers/mssql_to_hive.py @@ -118,9 +118,8 @@ def execute(self, context: Context): with NamedTemporaryFile(mode="w", encoding="utf-8") as tmp_file: csv_writer = csv.writer(tmp_file, delimiter=self.delimiter) field_dict = OrderedDict() - for col_count, field in enumerate(cursor.description, start=1): - col_position = f"Column{col_count}" - field_dict[col_position if field[0] == "" else field[0]] = self.type_map(field[1]) + for col_count, (key, val) in enumerate(cursor.description, start=1): + field_dict[key or f"Column{col_count}"] = self.type_map(val) csv_writer.writerows(cursor) tmp_file.flush() diff --git a/airflow/providers/apache/hive/transfers/vertica_to_hive.py b/airflow/providers/apache/hive/transfers/vertica_to_hive.py index 81869ed95ab3..d2b5bcf4a28e 100644 --- a/airflow/providers/apache/hive/transfers/vertica_to_hive.py +++ b/airflow/providers/apache/hive/transfers/vertica_to_hive.py @@ -122,9 +122,8 @@ def execute(self, context: Context): with NamedTemporaryFile(mode="w", encoding="utf-8") as f: csv_writer = csv.writer(f, delimiter=self.delimiter) field_dict = OrderedDict() - for col_count, field in enumerate(cursor.description, start=1): - col_position = f"Column{col_count}" - field_dict[col_position if field[0] == "" else field[0]] = self.type_map(field[1]) + for col_count, (key, val) in enumerate(cursor.description, start=1): + field_dict[key or f"Column{col_count}"] = self.type_map(val) csv_writer.writerows(cursor.iterate()) f.flush() cursor.close() From 20ebd990a70dc2c462a2e787b7baaa9bc0af74b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= <6774676+eumiro@users.noreply.github.com> Date: Thu, 17 Aug 2023 19:57:55 +0000 Subject: [PATCH 058/117] Consolidate import and usage of pandas (#33480) (cherry picked from commit 8e88eb8fa7e1fc12918dcbfcfc8ed28381008d33) --- airflow/providers/amazon/aws/transfers/sql_to_s3.py | 12 ++++++------ airflow/providers/apache/hive/hooks/hive.py | 10 +++++----- airflow/providers/google/cloud/hooks/bigquery.py | 4 ++-- airflow/providers/presto/hooks/presto.py | 6 +++--- airflow/providers/slack/transfers/sql_to_slack.py | 4 ++-- airflow/providers/trino/hooks/trino.py | 6 +++--- airflow/serialization/serializers/pandas.py | 8 ++++---- tests/serialization/serializers/test_serializers.py | 4 ++-- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/airflow/providers/amazon/aws/transfers/sql_to_s3.py b/airflow/providers/amazon/aws/transfers/sql_to_s3.py index 0324406820e2..0a466a940bee 100644 --- a/airflow/providers/amazon/aws/transfers/sql_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/sql_to_s3.py @@ -29,7 +29,7 @@ from airflow.providers.common.sql.hooks.sql import DbApiHook if TYPE_CHECKING: - from pandas import DataFrame + import pandas as pd from airflow.utils.context import Context @@ -130,7 +130,7 @@ def __init__( raise AirflowException(f"The argument file_format doesn't support {file_format} value.") @staticmethod - def _fix_dtypes(df: DataFrame, file_format: FILE_FORMAT) -> None: + def _fix_dtypes(df: pd.DataFrame, file_format: FILE_FORMAT) -> None: """ Mutate DataFrame to set dtypes for float columns containing NaN values. @@ -138,7 +138,7 @@ def _fix_dtypes(df: DataFrame, file_format: FILE_FORMAT) -> None: """ try: import numpy as np - from pandas import Float64Dtype, Int64Dtype + import pandas as pd except ImportError as e: from airflow.exceptions import AirflowOptionalProviderFeatureException @@ -159,13 +159,13 @@ def _fix_dtypes(df: DataFrame, file_format: FILE_FORMAT) -> None: # The type ignore can be removed here if https://github.com/numpy/numpy/pull/23690 # is merged and released as currently NumPy does not consider None as valid for x/y. df[col] = np.where(df[col].isnull(), None, df[col]) # type: ignore[call-overload] - df[col] = df[col].astype(Int64Dtype()) + df[col] = df[col].astype(pd.Int64Dtype()) elif np.isclose(notna_series, notna_series.astype(int)).all(): # set to float dtype that retains floats and supports NaNs # The type ignore can be removed here if https://github.com/numpy/numpy/pull/23690 # is merged and released df[col] = np.where(df[col].isnull(), None, df[col]) # type: ignore[call-overload] - df[col] = df[col].astype(Float64Dtype()) + df[col] = df[col].astype(pd.Float64Dtype()) def execute(self, context: Context) -> None: sql_hook = self._get_hook() @@ -188,7 +188,7 @@ def execute(self, context: Context) -> None: filename=tmp_file.name, key=object_key, bucket_name=self.s3_bucket, replace=self.replace ) - def _partition_dataframe(self, df: DataFrame) -> Iterable[tuple[str, DataFrame]]: + def _partition_dataframe(self, df: pd.DataFrame) -> Iterable[tuple[str, pd.DataFrame]]: """Partition dataframe using pandas groupby() method.""" if not self.groupby_kwargs: yield "", df diff --git a/airflow/providers/apache/hive/hooks/hive.py b/airflow/providers/apache/hive/hooks/hive.py index debe80e29fd0..0cf85c74276c 100644 --- a/airflow/providers/apache/hive/hooks/hive.py +++ b/airflow/providers/apache/hive/hooks/hive.py @@ -31,7 +31,7 @@ from airflow.exceptions import AirflowProviderDeprecationWarning try: - import pandas + import pandas as pd except ImportError as e: from airflow.exceptions import AirflowOptionalProviderFeatureException @@ -336,7 +336,7 @@ def test_hql(self, hql: str) -> None: def load_df( self, - df: pandas.DataFrame, + df: pd.DataFrame, table: str, field_dict: dict[Any, Any] | None = None, delimiter: str = ",", @@ -361,7 +361,7 @@ def load_df( :param kwargs: passed to self.load_file """ - def _infer_field_types_from_df(df: pandas.DataFrame) -> dict[Any, Any]: + def _infer_field_types_from_df(df: pd.DataFrame) -> dict[Any, Any]: dtype_kind_hive_type = { "b": "BOOLEAN", # boolean "i": "BIGINT", # signed integer @@ -1037,7 +1037,7 @@ def get_pandas_df( # type: ignore schema: str = "default", hive_conf: dict[Any, Any] | None = None, **kwargs, - ) -> pandas.DataFrame: + ) -> pd.DataFrame: """ Get a pandas dataframe from a Hive query. @@ -1056,5 +1056,5 @@ def get_pandas_df( # type: ignore :return: pandas.DateFrame """ res = self.get_results(sql, schema=schema, hive_conf=hive_conf) - df = pandas.DataFrame(res["data"], columns=[c[0] for c in res["header"]], **kwargs) + df = pd.DataFrame(res["data"], columns=[c[0] for c in res["header"]], **kwargs) return df diff --git a/airflow/providers/google/cloud/hooks/bigquery.py b/airflow/providers/google/cloud/hooks/bigquery.py index 2ac6c2645e20..44185963b568 100644 --- a/airflow/providers/google/cloud/hooks/bigquery.py +++ b/airflow/providers/google/cloud/hooks/bigquery.py @@ -30,6 +30,7 @@ from datetime import datetime, timedelta from typing import Any, Iterable, Mapping, NoReturn, Sequence, Union, cast +import pandas as pd from aiohttp import ClientSession as ClientSession from gcloud.aio.bigquery import Job, Table as Table_async from google.api_core.page_iterator import HTTPIterator @@ -49,7 +50,6 @@ from google.cloud.bigquery.table import EncryptionConfiguration, Row, RowIterator, Table, TableReference from google.cloud.exceptions import NotFound from googleapiclient.discovery import Resource, build -from pandas import DataFrame from pandas_gbq import read_gbq from pandas_gbq.gbq import GbqConnector # noqa from requests import Session @@ -244,7 +244,7 @@ def get_pandas_df( parameters: Iterable | Mapping[str, Any] | None = None, dialect: str | None = None, **kwargs, - ) -> DataFrame: + ) -> pd.DataFrame: """Get a Pandas DataFrame for the BigQuery results. The DbApiHook method must be overridden because Pandas doesn't support diff --git a/airflow/providers/presto/hooks/presto.py b/airflow/providers/presto/hooks/presto.py index 7ce902180789..028deb48ed1e 100644 --- a/airflow/providers/presto/hooks/presto.py +++ b/airflow/providers/presto/hooks/presto.py @@ -158,7 +158,7 @@ def get_first( raise PrestoException(e) def get_pandas_df(self, sql: str = "", parameters=None, **kwargs): - import pandas + import pandas as pd cursor = self.get_cursor() try: @@ -168,10 +168,10 @@ def get_pandas_df(self, sql: str = "", parameters=None, **kwargs): raise PrestoException(e) column_descriptions = cursor.description if data: - df = pandas.DataFrame(data, **kwargs) + df = pd.DataFrame(data, **kwargs) df.columns = [c[0] for c in column_descriptions] else: - df = pandas.DataFrame(**kwargs) + df = pd.DataFrame(**kwargs) return df def insert_rows( diff --git a/airflow/providers/slack/transfers/sql_to_slack.py b/airflow/providers/slack/transfers/sql_to_slack.py index 97017c80d7e9..ba72e689ee48 100644 --- a/airflow/providers/slack/transfers/sql_to_slack.py +++ b/airflow/providers/slack/transfers/sql_to_slack.py @@ -19,7 +19,7 @@ from tempfile import NamedTemporaryFile from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence -from pandas import DataFrame +import pandas as pd from tabulate import tabulate from airflow.exceptions import AirflowException @@ -70,7 +70,7 @@ def _get_hook(self) -> DbApiHook: ) return hook - def _get_query_results(self) -> DataFrame: + def _get_query_results(self) -> pd.DataFrame: sql_hook = self._get_hook() self.log.info("Running SQL query: %s", self.sql) diff --git a/airflow/providers/trino/hooks/trino.py b/airflow/providers/trino/hooks/trino.py index 5144978dab33..14461b727d99 100644 --- a/airflow/providers/trino/hooks/trino.py +++ b/airflow/providers/trino/hooks/trino.py @@ -178,7 +178,7 @@ def get_first( def get_pandas_df( self, sql: str = "", parameters: Iterable | Mapping[str, Any] | None = None, **kwargs ): # type: ignore[override] - import pandas + import pandas as pd cursor = self.get_cursor() try: @@ -188,10 +188,10 @@ def get_pandas_df( raise TrinoException(e) column_descriptions = cursor.description if data: - df = pandas.DataFrame(data, **kwargs) + df = pd.DataFrame(data, **kwargs) df.columns = [c[0] for c in column_descriptions] else: - df = pandas.DataFrame(**kwargs) + df = pd.DataFrame(**kwargs) return df def insert_rows( diff --git a/airflow/serialization/serializers/pandas.py b/airflow/serialization/serializers/pandas.py index 0fd9ae04dc9c..efdc8e11da41 100644 --- a/airflow/serialization/serializers/pandas.py +++ b/airflow/serialization/serializers/pandas.py @@ -28,7 +28,7 @@ deserializers = serializers if TYPE_CHECKING: - from pandas import DataFrame + import pandas as pd from airflow.serialization.serde import U @@ -36,11 +36,11 @@ def serialize(o: object) -> tuple[U, str, int, bool]: + import pandas as pd import pyarrow as pa - from pandas import DataFrame from pyarrow import parquet as pq - if not isinstance(o, DataFrame): + if not isinstance(o, pd.DataFrame): return "", "", 0, False # for now, we *always* serialize into in memory @@ -53,7 +53,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]: return buf.getvalue().hex().decode("utf-8"), qualname(o), __version__, True -def deserialize(classname: str, version: int, data: object) -> DataFrame: +def deserialize(classname: str, version: int, data: object) -> pd.DataFrame: if version > __version__: raise TypeError(f"serialized {version} of {classname} > {__version__}") diff --git a/tests/serialization/serializers/test_serializers.py b/tests/serialization/serializers/test_serializers.py index 79000bea17d7..e9805d4d777f 100644 --- a/tests/serialization/serializers/test_serializers.py +++ b/tests/serialization/serializers/test_serializers.py @@ -20,7 +20,7 @@ import decimal import numpy -import pandas +import pandas as pd import pendulum.tz import pytest from pendulum import DateTime @@ -94,7 +94,7 @@ def test_params(self): assert i["x"] == d["x"] def test_pandas(self): - i = pandas.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + i = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) e = serialize(i) d = deserialize(e) assert i.equals(d) From ea432df49a0a9e1a2af0541a4413178e196847c3 Mon Sep 17 00:00:00 2001 From: Hussein Awala Date: Fri, 18 Aug 2023 08:46:59 +0200 Subject: [PATCH 059/117] Fix a bug in formatDuration method (#33486) (cherry picked from commit eed09daad3278706da29c8f172899e67ecadc40a) --- airflow/www/static/js/cluster-activity/nav/FilterBar.tsx | 2 +- airflow/www/static/js/datetime_utils.js | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/airflow/www/static/js/cluster-activity/nav/FilterBar.tsx b/airflow/www/static/js/cluster-activity/nav/FilterBar.tsx index 6cae7854f184..b4f840cb5fc6 100644 --- a/airflow/www/static/js/cluster-activity/nav/FilterBar.tsx +++ b/airflow/www/static/js/cluster-activity/nav/FilterBar.tsx @@ -81,7 +81,7 @@ const FilterBar = () => { - over the last{" "} + for a period of{" "} {formatDuration(getDuration(formattedStartDate, formattedEndDate))} diff --git a/airflow/www/static/js/datetime_utils.js b/airflow/www/static/js/datetime_utils.js index c23f8d665e83..5eb1f1defb8f 100644 --- a/airflow/www/static/js/datetime_utils.js +++ b/airflow/www/static/js/datetime_utils.js @@ -120,7 +120,8 @@ export const getDuration = (startDate, endDate) => export const formatDuration = (dur) => { const duration = moment.duration(dur); - const days = duration.days(); + const totalDays = duration.asDays(); + const days = Math.floor(totalDays); // .as('milliseconds') is necessary for .format() to work correctly return `${days > 0 ? `${days}d` : ""}${moment .utc(duration.as("milliseconds")) From 5c8e799d928f3223569e1a268e318b3f7f8b5828 Mon Sep 17 00:00:00 2001 From: Pierre Jeambrun Date: Fri, 18 Aug 2023 19:09:10 +0200 Subject: [PATCH 060/117] Do not return ongoin dagrun when a end_date is less than utcnow (#33488) (cherry picked from commit 7c51c87503004d57fe726dae2ab830a5ffd9b64b) --- .../static/js/cluster-activity/useFilters.tsx | 5 +++- airflow/www/views.py | 13 +++++----- .../www/views/test_views_cluster_activity.py | 25 +++++++++++++------ 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/airflow/www/static/js/cluster-activity/useFilters.tsx b/airflow/www/static/js/cluster-activity/useFilters.tsx index 6e30683e3a5c..2e1c415223d2 100644 --- a/airflow/www/static/js/cluster-activity/useFilters.tsx +++ b/airflow/www/static/js/cluster-activity/useFilters.tsx @@ -49,7 +49,10 @@ export const now = date.toISOString(); const useFilters = (): FilterHookReturn => { const [searchParams, setSearchParams] = useSearchParams(); - const endDate = searchParams.get(END_DATE_PARAM) || now; + const endDate = + searchParams.get(END_DATE_PARAM) || + // @ts-ignore + moment(now).add(1, "h").toISOString(); const startDate = searchParams.get(START_DATE_PARAM) || // @ts-ignore diff --git a/airflow/www/views.py b/airflow/www/views.py index 683fabc77f45..0d230fed6fdc 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -67,7 +67,7 @@ from markupsafe import Markup, escape from pendulum.datetime import DateTime from pendulum.parsing.exceptions import ParserError -from sqlalchemy import Date, and_, case, desc, func, inspect, or_, select, union_all +from sqlalchemy import Date, and_, case, desc, func, inspect, select, union_all from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, joinedload from wtforms import BooleanField, validators @@ -3739,13 +3739,14 @@ def historical_metrics_data(self): """Returns cluster activity historical metrics.""" start_date = _safe_parse_datetime(request.args.get("start_date")) end_date = _safe_parse_datetime(request.args.get("end_date")) + with create_session() as session: # DagRuns - dag_runs_type = session.execute( + dag_run_types = session.execute( select(DagRun.run_type, func.count(DagRun.run_id)) .where( DagRun.start_date >= start_date, - or_(DagRun.end_date.is_(None), DagRun.end_date <= end_date), + func.coalesce(DagRun.end_date, datetime.datetime.utcnow()) <= end_date, ) .group_by(DagRun.run_type) ).all() @@ -3754,7 +3755,7 @@ def historical_metrics_data(self): select(DagRun.state, func.count(DagRun.run_id)) .where( DagRun.start_date >= start_date, - or_(DagRun.end_date.is_(None), DagRun.end_date <= end_date), + func.coalesce(DagRun.end_date, datetime.datetime.utcnow()) <= end_date, ) .group_by(DagRun.state) ).all() @@ -3765,7 +3766,7 @@ def historical_metrics_data(self): .join(TaskInstance.dag_run) .where( DagRun.start_date >= start_date, - or_(DagRun.end_date.is_(None), DagRun.end_date <= end_date), + func.coalesce(DagRun.end_date, datetime.datetime.utcnow()) <= end_date, ) .group_by(TaskInstance.state) ).all() @@ -3773,7 +3774,7 @@ def historical_metrics_data(self): data = { "dag_run_types": { **{dag_run_type.value: 0 for dag_run_type in DagRunType}, - **{run_type: sum_value for run_type, sum_value in dag_runs_type}, + **{run_type: sum_value for run_type, sum_value in dag_run_types}, }, "dag_run_states": { **{dag_run_state.value: 0 for dag_run_state in DagRunState}, diff --git a/tests/www/views/test_views_cluster_activity.py b/tests/www/views/test_views_cluster_activity.py index 13105eb28a11..1825a1d68062 100644 --- a/tests/www/views/test_views_cluster_activity.py +++ b/tests/www/views/test_views_cluster_activity.py @@ -49,7 +49,7 @@ def freeze_time_for_dagruns(time_machine): @pytest.fixture -def make_dag_runs(dag_maker, session): +def make_dag_runs(dag_maker, session, time_machine): with dag_maker( dag_id="test_dag_id", serialized=True, @@ -76,29 +76,40 @@ def make_dag_runs(dag_maker, session): start_date=dag_maker.dag.next_dagrun_info(date).logical_date, ) + run3 = dag_maker.create_dagrun( + run_id="run_3", + state=DagRunState.RUNNING, + run_type=DagRunType.SCHEDULED, + execution_date=pendulum.DateTime(2023, 2, 3, 0, 0, 0, tzinfo=pendulum.UTC), + start_date=pendulum.DateTime(2023, 2, 3, 0, 0, 0, tzinfo=pendulum.UTC), + ) + run3.end_date = None + for ti in run1.task_instances: ti.state = TaskInstanceState.SUCCESS for ti in run2.task_instances: ti.state = TaskInstanceState.FAILED + time_machine.move_to("2023-07-02T00:00:00+00:00", tick=False) + session.flush() @pytest.mark.usefixtures("freeze_time_for_dagruns", "make_dag_runs") -def test_historical_metrics_data(admin_client, session): +def test_historical_metrics_data(admin_client, session, time_machine): resp = admin_client.get( - "/object/historical_metrics_data?start_date=2023-01-01T00:00&end_date=2023-05-02T00:00", + "/object/historical_metrics_data?start_date=2023-01-01T00:00&end_date=2023-08-02T00:00", follow_redirects=True, ) assert resp.status_code == 200 assert resp.json == { - "dag_run_states": {"failed": 1, "queued": 0, "running": 0, "success": 1}, - "dag_run_types": {"backfill": 0, "dataset_triggered": 1, "manual": 0, "scheduled": 1}, + "dag_run_states": {"failed": 1, "queued": 0, "running": 1, "success": 1}, + "dag_run_types": {"backfill": 0, "dataset_triggered": 1, "manual": 0, "scheduled": 2}, "task_instance_states": { "deferred": 0, "failed": 2, - "no_status": 0, + "no_status": 2, "queued": 0, "removed": 0, "restarting": 0, @@ -117,7 +128,7 @@ def test_historical_metrics_data(admin_client, session): @pytest.mark.usefixtures("freeze_time_for_dagruns", "make_dag_runs") def test_historical_metrics_data_date_filters(admin_client, session): resp = admin_client.get( - "/object/historical_metrics_data?start_date=2023-02-02T00:00&end_date=2023-05-02T00:00", + "/object/historical_metrics_data?start_date=2023-02-02T00:00&end_date=2023-06-02T00:00", follow_redirects=True, ) assert resp.status_code == 200 From e2578691ba644e9149a5de88abe469af7c18177a Mon Sep 17 00:00:00 2001 From: Vijayasarathi Balasubramanian Date: Fri, 18 Aug 2023 13:54:49 -0400 Subject: [PATCH 061/117] Fix broken link in Modules Management page (#33499) fixing broken link in Modules Management (cherry picked from commit 02d3378fc937bacabed307b5b9509b729683bf4a) --- .../administration-and-deployment/modules_management.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/apache-airflow/administration-and-deployment/modules_management.rst b/docs/apache-airflow/administration-and-deployment/modules_management.rst index 6ee9f5b65029..44b360fbc83e 100644 --- a/docs/apache-airflow/administration-and-deployment/modules_management.rst +++ b/docs/apache-airflow/administration-and-deployment/modules_management.rst @@ -338,7 +338,7 @@ for creating and distributing Python packages. ``wheel``: The wheel package provides a bdist_wheel command for setuptools. It creates .whl file which is directly installable through the ``pip install`` -command. We can then upload the same file to `PyPI `_. +command. We can then upload the same file to `PyPI `_. .. code-block:: bash From 7eeba7c3a660f8fef437ce694ca931e0188130fb Mon Sep 17 00:00:00 2001 From: Andrey Anshin Date: Fri, 18 Aug 2023 23:40:52 +0400 Subject: [PATCH 062/117] Use `dialect.name` in custom SA types (#33503) * Use `dialect.name` in custom SA types * Fix removed import (cherry picked from commit 46aa4294e453d800ef6d327addf72a004be3765f) --- airflow/utils/sqlalchemy.py | 39 ++++++++++++++----------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/airflow/utils/sqlalchemy.py b/airflow/utils/sqlalchemy.py index 38716d4eb5f5..b4b726ca327a 100644 --- a/airflow/utils/sqlalchemy.py +++ b/airflow/utils/sqlalchemy.py @@ -30,23 +30,22 @@ from sqlalchemy.dialects import mssql, mysql from sqlalchemy.exc import OperationalError from sqlalchemy.sql import ColumnElement, Select -from sqlalchemy.sql.expression import ColumnOperators from sqlalchemy.types import JSON, Text, TypeDecorator, TypeEngine, UnicodeText from airflow import settings from airflow.configuration import conf from airflow.serialization.enums import Encoding +from airflow.utils.timezone import make_naive if TYPE_CHECKING: from kubernetes.client.models.v1_pod import V1Pod from sqlalchemy.orm import Query, Session + from sqlalchemy.sql.expression import ColumnOperators log = logging.getLogger(__name__) utc = pendulum.tz.timezone("UTC") -using_mysql = conf.get_mandatory_value("database", "sql_alchemy_conn").lower().startswith("mysql") - class UtcDateTime(TypeDecorator): """ @@ -67,22 +66,18 @@ class UtcDateTime(TypeDecorator): cache_ok = True def process_bind_param(self, value, dialect): - if value is not None: - if not isinstance(value, datetime.datetime): - raise TypeError("expected datetime.datetime, not " + repr(value)) - elif value.tzinfo is None: - raise ValueError("naive datetime is disallowed") + if not isinstance(value, datetime.datetime): + if value is None: + return None + raise TypeError("expected datetime.datetime, not " + repr(value)) + elif value.tzinfo is None: + raise ValueError("naive datetime is disallowed") + elif dialect.name == "mysql": # For mysql we should store timestamps as naive values - # Timestamp in MYSQL is not timezone aware. In MySQL 5.6 - # timezone added at the end is ignored but in MySQL 5.7 - # inserting timezone value fails with 'invalid-date' + # In MySQL 5.7 inserting timezone value fails with 'invalid-date' # See https://issues.apache.org/jira/browse/AIRFLOW-7001 - if using_mysql: - from airflow.utils.timezone import make_naive - - return make_naive(value, timezone=utc) - return value.astimezone(utc) - return None + return make_naive(value, timezone=utc) + return value.astimezone(utc) def process_result_value(self, value, dialect): """ @@ -119,12 +114,8 @@ class ExtendedJSON(TypeDecorator): cache_ok = True - def db_supports_json(self): - """Check if the database supports JSON (i.e. is NOT MSSQL).""" - return not conf.get("database", "sql_alchemy_conn").startswith("mssql") - def load_dialect_impl(self, dialect) -> TypeEngine: - if self.db_supports_json(): + if dialect.name != "mssql": return dialect.type_descriptor(JSON) return dialect.type_descriptor(UnicodeText) @@ -138,7 +129,7 @@ def process_bind_param(self, value, dialect): value = BaseSerialization.serialize(value) # Then, if the database does not have native JSON support, encode it again as a string - if not self.db_supports_json(): + if dialect.name == "mssql": value = json.dumps(value) return value @@ -150,7 +141,7 @@ def process_result_value(self, value, dialect): return None # Deserialize from a string first if needed - if not self.db_supports_json(): + if dialect.name == "mssql": value = json.loads(value) return BaseSerialization.deserialize(value) From 3f8c35246fa2de012a4b8f1af3c1fb2afba2a2fe Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 18 Aug 2023 21:54:32 +0100 Subject: [PATCH 063/117] Update version_added field for configs in config file (#33509) The otel was 2.6.0 as can be seen here: https://github.com/apache/airflow/pull/30160 and the internal api and related were 2.6.0 even though they were under a flag. These need to be updated except we want to modify the check tool to skip these ones (cherry picked from commit ba4f541fba656acb6c35469c97a1e1bfc79f2d96) --- airflow/config_templates/config.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index ec386f7158d3..cdc626086d22 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -73,7 +73,7 @@ core: auth_manager: description: | The auth manager class that airflow should use. Full import path to the auth manager class. - version_added: ~ + version_added: 2.7.0 type: string example: ~ default: "airflow.auth.managers.fab.fab_auth_manager.FabAuthManager" @@ -446,14 +446,14 @@ core: example: '{"some_param": "some_value"}' database_access_isolation: description: (experimental) Whether components should use Airflow Internal API for DB connectivity. - version_added: 2.7.0 + version_added: 2.6.0 type: boolean example: ~ default: "False" internal_api_url: description: | (experimental) Airflow Internal API url. Only used if [core] database_access_isolation is True. - version_added: 2.7.0 + version_added: 2.6.0 type: string default: ~ example: 'http://localhost:8080' @@ -919,7 +919,7 @@ logging: With this option AirFlow will create new handlers and send low level logs like INFO and WARNING to stdout, while sending higher severity logs to stderr. - version_added: 2.5.4 + version_added: 2.7.0 type: boolean example: ~ default: "False" @@ -1031,31 +1031,31 @@ metrics: otel_on: description: | Enables sending metrics to OpenTelemetry. - version_added: 2.5.1 + version_added: 2.6.0 type: string example: ~ default: "False" otel_host: description: ~ - version_added: 2.5.1 + version_added: 2.6.0 type: string example: ~ default: "localhost" otel_port: description: ~ - version_added: 2.5.1 + version_added: 2.6.0 type: string example: ~ default: "8889" otel_prefix: description: ~ - version_added: 2.0.0 + version_added: 2.6.0 type: string example: ~ default: "airflow" otel_interval_milliseconds: description: ~ - version_added: 2.0.0 + version_added: 2.6.0 type: integer example: ~ default: "60000" @@ -1276,7 +1276,7 @@ operators: default_deferrable: description: | The default value of attribute "deferrable" in operators and sensors. - version_added: ~ + version_added: 2.7.0 type: boolean example: ~ default: "false" @@ -1766,7 +1766,7 @@ webserver: run_internal_api: description: | Boolean for running Internal API in the webserver. - version_added: 2.7.0 + version_added: 2.6.0 type: boolean example: ~ default: "False" From dc73e786fe2f675bfa1a125886f5da40d56f899d Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Fri, 18 Aug 2023 21:25:53 +0100 Subject: [PATCH 064/117] Fix typo in `db upgrade` warning message (#33510) `db updgrade` to `db upgrade` (cherry picked from commit bfba17bb37feea74247a06aaa65560d125247fee) --- airflow/cli/commands/db_command.py | 2 +- tests/cli/commands/test_db_command.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/cli/commands/db_command.py b/airflow/cli/commands/db_command.py index f0d3ac4edc94..dad068547c23 100644 --- a/airflow/cli/commands/db_command.py +++ b/airflow/cli/commands/db_command.py @@ -61,7 +61,7 @@ def resetdb(args): def upgradedb(args): """Upgrades the metadata database.""" - warnings.warn("`db updgrade` is deprecated. Use `db migrate` instead.", DeprecationWarning) + warnings.warn("`db upgrade` is deprecated. Use `db migrate` instead.", DeprecationWarning) migratedb(args) diff --git a/tests/cli/commands/test_db_command.py b/tests/cli/commands/test_db_command.py index e928dc66bf24..4bfb80d4f5ab 100644 --- a/tests/cli/commands/test_db_command.py +++ b/tests/cli/commands/test_db_command.py @@ -127,7 +127,7 @@ def test_cli_sync_failure(self, mock_upgradedb, args, pattern): @mock.patch("airflow.cli.commands.db_command.migratedb") def test_cli_upgrade(self, mock_migratedb): - with pytest.warns(expected_warning=DeprecationWarning, match="`db updgrade` is deprecated"): + with pytest.warns(expected_warning=DeprecationWarning, match="`db upgrade` is deprecated"): db_command.upgradedb(self.parser.parse_args(["db", "upgrade"])) mock_migratedb.assert_called_once() From a3fa682e8ca7133cbddb0bd7775f5775488cd2ec Mon Sep 17 00:00:00 2001 From: Hussein Awala Date: Sun, 20 Aug 2023 20:12:21 +0200 Subject: [PATCH 065/117] Set strict to True when parsing dates in webserver views (#33512) Co-authored-by: Tzu-ping Chung --------- Co-authored-by: Tzu-ping Chung (cherry picked from commit 4390524a41fdfd2d57f1d2dc98ad7b4009c8399e) --- airflow/utils/timezone.py | 5 +++-- airflow/www/views.py | 11 ++++++----- tests/www/views/test_views_extra_links.py | 22 ++++++++++++---------- tests/www/views/test_views_log.py | 3 ++- tests/www/views/test_views_tasks.py | 3 ++- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/airflow/utils/timezone.py b/airflow/utils/timezone.py index f7116cd81f98..12c75bef5976 100644 --- a/airflow/utils/timezone.py +++ b/airflow/utils/timezone.py @@ -194,16 +194,17 @@ def datetime(*args, **kwargs): return dt.datetime(*args, **kwargs) -def parse(string: str, timezone=None) -> DateTime: +def parse(string: str, timezone=None, *, strict=False) -> DateTime: """ Parse a time string and return an aware datetime. :param string: time string :param timezone: the timezone + :param strict: if False, it will fall back on the dateutil parser if unable to parse with pendulum """ from airflow.settings import TIMEZONE - return pendulum.parse(string, tz=timezone or TIMEZONE, strict=False) # type: ignore + return pendulum.parse(string, tz=timezone or TIMEZONE, strict=strict) # type: ignore @overload diff --git a/airflow/www/views.py b/airflow/www/views.py index 0d230fed6fdc..0be5226734b0 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -265,17 +265,18 @@ def get_date_time_num_runs_dag_runs_form_data(www_request, session, dag): } -def _safe_parse_datetime(v, allow_empty=False) -> datetime.datetime | None: +def _safe_parse_datetime(v, *, allow_empty=False, strict=True) -> datetime.datetime | None: """ Parse datetime and return error message for invalid dates. :param v: the string value to be parsed :param allow_empty: Set True to return none if empty str or None + :param strict: if False, it will fall back on the dateutil parser if unable to parse with pendulum """ if allow_empty is True and not v: return None try: - return timezone.parse(v) + return timezone.parse(v, strict=strict) except (TypeError, ParserError): abort(400, f"Invalid datetime: {v!r}") @@ -1630,7 +1631,7 @@ def get_logs_with_metadata(self, session: Session = NEW_SESSION): # Convert string datetime into actual datetime try: - execution_date = timezone.parse(execution_date_str) + execution_date = timezone.parse(execution_date_str, strict=True) except ValueError: error_message = ( f"Given execution date, {execution_date}, could not be identified as a date. " @@ -2103,7 +2104,7 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION): ) try: - execution_date = timezone.parse(request_execution_date) + execution_date = timezone.parse(request_execution_date, strict=True) except ParserError: flash("Invalid execution date", "error") form = DateTimeForm(data={"execution_date": timezone.utcnow().isoformat()}) @@ -3699,7 +3700,7 @@ def grid_data(self): num_runs = conf.getint("webserver", "default_dag_run_display_number") try: - base_date = timezone.parse(request.args["base_date"]) + base_date = timezone.parse(request.args["base_date"], strict=True) except (KeyError, ValueError): base_date = dag.get_latest_execution_date() or timezone.utcnow() diff --git a/tests/www/views/test_views_extra_links.py b/tests/www/views/test_views_extra_links.py index 9b6ec14d352f..acd1dcee7023 100644 --- a/tests/www/views/test_views_extra_links.py +++ b/tests/www/views/test_views_extra_links.py @@ -18,6 +18,7 @@ from __future__ import annotations import json +import urllib.parse from unittest import mock import pytest @@ -30,7 +31,8 @@ from tests.test_utils.db import clear_db_runs from tests.test_utils.mock_operators import AirflowLink, Dummy2TestOperator, Dummy3TestOperator -DEFAULT_DATE = timezone.datetime(2017, 1, 1) +DEFAULT_DATE = timezone.datetime(2017, 1, 1, tzinfo=timezone.utc) +STR_DEFAULT_DATE = urllib.parse.quote(DEFAULT_DATE.strftime("%Y-%m-%dT%H:%M:%S.%f%z")) ENDPOINT = "extra_links" @@ -129,7 +131,7 @@ def reset_task_instances(): def test_extra_links_works(dag_run, task_1, viewer_client, session): response = viewer_client.get( f"{ENDPOINT}?dag_id={task_1.dag_id}&task_id={task_1.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=foo-bar", + f"&execution_date={STR_DEFAULT_DATE}&link_name=foo-bar", follow_redirects=True, ) @@ -143,7 +145,7 @@ def test_extra_links_works(dag_run, task_1, viewer_client, session): def test_global_extra_links_works(dag_run, task_1, viewer_client, session): response = viewer_client.get( f"{ENDPOINT}?dag_id={dag_run.dag_id}&task_id={task_1.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=github", + f"&execution_date={STR_DEFAULT_DATE}&link_name=github", follow_redirects=True, ) @@ -157,7 +159,7 @@ def test_global_extra_links_works(dag_run, task_1, viewer_client, session): def test_operator_extra_link_override_global_extra_link(dag_run, task_1, viewer_client): response = viewer_client.get( f"{ENDPOINT}?dag_id={task_1.dag_id}&task_id={task_1.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=airflow", + f"&execution_date={STR_DEFAULT_DATE}&link_name=airflow", follow_redirects=True, ) @@ -171,7 +173,7 @@ def test_operator_extra_link_override_global_extra_link(dag_run, task_1, viewer_ def test_extra_links_error_raised(dag_run, task_1, viewer_client): response = viewer_client.get( f"{ENDPOINT}?dag_id={task_1.dag_id}&task_id={task_1.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=raise_error", + f"&execution_date={STR_DEFAULT_DATE}&link_name=raise_error", follow_redirects=True, ) @@ -185,7 +187,7 @@ def test_extra_links_error_raised(dag_run, task_1, viewer_client): def test_extra_links_no_response(dag_run, task_1, viewer_client): response = viewer_client.get( f"{ENDPOINT}?dag_id={task_1.dag_id}&task_id={task_1.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=no_response", + f"&execution_date={STR_DEFAULT_DATE}&link_name=no_response", follow_redirects=True, ) @@ -206,7 +208,7 @@ def test_operator_extra_link_override_plugin(dag_run, task_2, viewer_client): """ response = viewer_client.get( f"{ENDPOINT}?dag_id={task_2.dag_id}&task_id={task_2.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=airflow", + f"&execution_date={STR_DEFAULT_DATE}&link_name=airflow", follow_redirects=True, ) @@ -228,7 +230,7 @@ def test_operator_extra_link_multiple_operators(dag_run, task_2, task_3, viewer_ """ response = viewer_client.get( f"{ENDPOINT}?dag_id={task_2.dag_id}&task_id={task_2.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=airflow", + f"&execution_date={STR_DEFAULT_DATE}&link_name=airflow", follow_redirects=True, ) @@ -240,7 +242,7 @@ def test_operator_extra_link_multiple_operators(dag_run, task_2, task_3, viewer_ response = viewer_client.get( f"{ENDPOINT}?dag_id={task_3.dag_id}&task_id={task_3.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=airflow", + f"&execution_date={STR_DEFAULT_DATE}&link_name=airflow", follow_redirects=True, ) @@ -253,7 +255,7 @@ def test_operator_extra_link_multiple_operators(dag_run, task_2, task_3, viewer_ # Also check that the other Operator Link defined for this operator exists response = viewer_client.get( f"{ENDPOINT}?dag_id={task_3.dag_id}&task_id={task_3.task_id}" - f"&execution_date={DEFAULT_DATE}&link_name=google", + f"&execution_date={STR_DEFAULT_DATE}&link_name=google", follow_redirects=True, ) diff --git a/tests/www/views/test_views_log.py b/tests/www/views/test_views_log.py index 207e9a3d89fd..aa8b583546de 100644 --- a/tests/www/views/test_views_log.py +++ b/tests/www/views/test_views_log.py @@ -49,7 +49,8 @@ DAG_ID_REMOVED = "removed_dag_for_testing_log_view" TASK_ID = "task_for_testing_log_view" DEFAULT_DATE = timezone.datetime(2017, 9, 1) -ENDPOINT = f"log?dag_id={DAG_ID}&task_id={TASK_ID}&execution_date={DEFAULT_DATE}" +STR_DEFAULT_DATE = urllib.parse.quote(DEFAULT_DATE.strftime("%Y-%m-%dT%H:%M:%S.%f%z")) +ENDPOINT = f"log?dag_id={DAG_ID}&task_id={TASK_ID}&execution_date={STR_DEFAULT_DATE}" @pytest.fixture(scope="module", autouse=True) diff --git a/tests/www/views/test_views_tasks.py b/tests/www/views/test_views_tasks.py index c7f10d2747cd..01fdc820aea0 100644 --- a/tests/www/views/test_views_tasks.py +++ b/tests/www/views/test_views_tasks.py @@ -46,6 +46,7 @@ from tests.test_utils.www import check_content_in_response, check_content_not_in_response, client_with_login DEFAULT_DATE = timezone.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) +STR_DEFAULT_DATE = urllib.parse.quote(DEFAULT_DATE.strftime("%Y-%m-%dT%H:%M:%S.%f%z")) DEFAULT_VAL = urllib.parse.quote_plus(str(DEFAULT_DATE)) @@ -1041,7 +1042,7 @@ def test_graph_view_doesnt_fail_on_recursion_error(app, dag_maker, admin_client) def test_task_instances(admin_client): """Test task_instances view.""" resp = admin_client.get( - f"/object/task_instances?dag_id=example_bash_operator&execution_date={DEFAULT_DATE}", + f"/object/task_instances?dag_id=example_bash_operator&execution_date={STR_DEFAULT_DATE}", follow_redirects=True, ) assert resp.status_code == 200 From f4c2b58ec88a962234514a5fc97a232ff1f9ebdf Mon Sep 17 00:00:00 2001 From: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com> Date: Sat, 19 Aug 2023 01:54:31 -0600 Subject: [PATCH 066/117] Move license templates out of repo root (#33515) (cherry picked from commit f89af2163fdc907aac8ebb9d1d0ddadf6e5c4806) --- .pre-commit-config.yaml | 22 +++++++++---------- .../ci/license-templates}/LICENSE.rst | 0 .../ci/license-templates}/LICENSE.txt | 0 ...eck_providers_subpackages_all_have_init.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) rename {license-templates => scripts/ci/license-templates}/LICENSE.rst (100%) rename {license-templates => scripts/ci/license-templates}/LICENSE.txt (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ecfebb09c8f..dd2af4e15744 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: - --comment-style - "/*||*/" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all RST files @@ -61,7 +61,7 @@ repos: - --comment-style - "||" - --license-filepath - - license-templates/LICENSE.rst + - scripts/ci/license-templates/LICENSE.rst - --fuzzy-match-generates-todo files: \.rst$ - id: insert-license @@ -72,7 +72,7 @@ repos: - --comment-style - "/*!| *| */" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all JINJA template files @@ -82,7 +82,7 @@ repos: - --comment-style - "{#||#}" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Shell files @@ -92,7 +92,7 @@ repos: - --comment-style - "|#|" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Python files @@ -102,7 +102,7 @@ repos: - --comment-style - "|#|" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all XML files @@ -112,7 +112,7 @@ repos: - --comment-style - "" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Helm template files @@ -121,7 +121,7 @@ repos: - --comment-style - "{{/*||*/}}" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all YAML files except Helm templates @@ -132,7 +132,7 @@ repos: - --comment-style - "|#|" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Markdown files @@ -142,7 +142,7 @@ repos: - --comment-style - "" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all other files @@ -151,7 +151,7 @@ repos: - --comment-style - "|#|" - --license-filepath - - license-templates/LICENSE.txt + - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo files: > \.cfg$|\.conf$|\.ini$|\.ldif$|\.properties$|\.readthedocs$|\.service$|\.tf$|Dockerfile.*$ diff --git a/license-templates/LICENSE.rst b/scripts/ci/license-templates/LICENSE.rst similarity index 100% rename from license-templates/LICENSE.rst rename to scripts/ci/license-templates/LICENSE.rst diff --git a/license-templates/LICENSE.txt b/scripts/ci/license-templates/LICENSE.txt similarity index 100% rename from license-templates/LICENSE.txt rename to scripts/ci/license-templates/LICENSE.txt diff --git a/scripts/ci/pre_commit/pre_commit_check_providers_subpackages_all_have_init.py b/scripts/ci/pre_commit/pre_commit_check_providers_subpackages_all_have_init.py index 0d491ad91fbb..f06425a92bce 100755 --- a/scripts/ci/pre_commit/pre_commit_check_providers_subpackages_all_have_init.py +++ b/scripts/ci/pre_commit/pre_commit_check_providers_subpackages_all_have_init.py @@ -33,7 +33,7 @@ def check_dir_init_file(provider_files: list[str]) -> None: missing_init_dirs.append(path) if missing_init_dirs: - with open(os.path.join(ROOT_DIR, "license-templates/LICENSE.txt")) as license: + with open(os.path.join(ROOT_DIR, "scripts/ci/license-templates/LICENSE.txt")) as license: license_txt = license.readlines() prefixed_licensed_txt = [f"# {line}" if line != "\n" else "#\n" for line in license_txt] From df352f37e748706cd18141252b85db8eaaeb3926 Mon Sep 17 00:00:00 2001 From: Hussein Awala Date: Sat, 19 Aug 2023 18:03:39 +0200 Subject: [PATCH 067/117] Stop adding values to rendered templates UI when there is no dagrun (#33516) (cherry picked from commit d9814eb3a2fc1dbbb885a0a2c1b7a23ce1cfa148) --- airflow/www/views.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 0be5226734b0..05a88115dd76 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1446,6 +1446,8 @@ def rendered_templates(self, session): dag_run = dag.get_dagrun(execution_date=dttm, session=session) raw_task = dag.get_task(task_id).prepare_for_execution() + no_dagrun = False + title = "Rendered Template" html_dict = {} @@ -1458,6 +1460,7 @@ def rendered_templates(self, session): # database) for presentation only. ti = TaskInstance(raw_task, map_index=map_index) ti.dag_run = DagRun(dag_id=dag_id, execution_date=dttm) + no_dagrun = True else: ti = dag_run.get_task_instance(task_id=task_id, map_index=map_index, session=session) if ti: @@ -1503,28 +1506,33 @@ def rendered_templates(self, session): content = getattr(task, template_field) renderer = task.template_fields_renderers.get(template_field, template_field) if renderer in renderers: - html_dict[template_field] = renderers[renderer](content) + html_dict[template_field] = renderers[renderer](content) if not no_dagrun else "" else: - html_dict[template_field] = Markup("
{}
").format(pformat(content)) + html_dict[template_field] = Markup("
{}
").format( + pformat(content) if not no_dagrun else "" + ) if isinstance(content, dict): if template_field == "op_kwargs": for key, value in content.items(): renderer = task.template_fields_renderers.get(key, key) if renderer in renderers: - html_dict[".".join([template_field, key])] = renderers[renderer](value) + html_dict[".".join([template_field, key])] = ( + renderers[renderer](value) if not no_dagrun else "" + ) else: html_dict[".".join([template_field, key])] = Markup( "
{}
" - ).format(pformat(value)) + ).format(pformat(value) if not no_dagrun else "") else: for dict_keys in get_key_paths(content): template_path = ".".join((template_field, dict_keys)) renderer = task.template_fields_renderers.get(template_path, template_path) if renderer in renderers: content_value = get_value_from_path(dict_keys, content) - html_dict[template_path] = renderers[renderer](content_value) - + html_dict[template_path] = ( + renderers[renderer](content_value) if not no_dagrun else "" + ) return self.render_template( "airflow/ti_code.html", show_trigger_form_if_no_params=conf.getboolean("webserver", "show_trigger_form_if_no_params"), From 42cfb9143fb77ca14d72ee7536490ae31e247fde Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 19 Aug 2023 09:48:37 +0200 Subject: [PATCH 068/117] Fix typo in release notes (#33521) (cherry picked from commit e69aae1304b478b76d9088729ce6d4df5448e5c0) --- RELEASE_NOTES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index c3986432cfc2..fbd2ba786ef2 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -279,7 +279,7 @@ Bug Fixes - Adapt Notifier for sla_miss_callback (#31887) - Fix XCOM view (#31807) - Fix for "Filter dags by tag" flickering on initial load of dags.html (#31578) -- Fix where expanding ``resizer`` wouldn't expanse grid view (#31581) +- Fix where expanding ``resizer`` would not expanse grid view (#31581) - Fix MappedOperator-BaseOperator attr sync check (#31520) - Always pass named ``type_`` arg to drop_constraint (#31306) - Fix bad ``drop_constraint`` call in migrations (#31302) From 5101840c3ed8a44bb4b6168e5bf94de7927fec36 Mon Sep 17 00:00:00 2001 From: Andrey Anshin Date: Thu, 24 Aug 2023 12:58:29 +0400 Subject: [PATCH 069/117] Sort data before groupby in TIS duration calculation (#33535) (cherry picked from commit 79b8cfc0fa77f11491fc1de4d5f009e176aa7c3a) --- airflow/www/views.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 05a88115dd76..916a20941d90 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -3236,10 +3236,12 @@ def duration(self, dag_id: str, session: Session = NEW_SESSION): if failed_task_instance.duration: fails_totals[dict_key] += failed_task_instance.duration - # we must group any mapped TIs by dag_id, task_id, run_id + # We must group any mapped TIs by dag_id, task_id, run_id + def grouping_key(ti: TaskInstance): + return ti.dag_id, ti.task_id, ti.run_id + mapped_tis = set() - tis_grouped = itertools.groupby(task_instances, lambda x: (x.dag_id, x.task_id, x.run_id)) - for _, group in tis_grouped: + for _, group in itertools.groupby(sorted(task_instances, key=grouping_key), key=grouping_key): tis = list(group) duration = sum(x.duration for x in tis if x.duration) if duration: From d508a9a54b3628635c1b37249e536ba96879165a Mon Sep 17 00:00:00 2001 From: Vincent <97131062+vincbeck@users.noreply.github.com> Date: Tue, 22 Aug 2023 12:51:17 -0400 Subject: [PATCH 070/117] Add a fallback in case no first name and last name are set (#33617) (cherry picked from commit 62b917a6ac61fd6882c377e3b04f72d908f52a58) --- airflow/www/templates/appbuilder/navbar_right.html | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/airflow/www/templates/appbuilder/navbar_right.html b/airflow/www/templates/appbuilder/navbar_right.html index 8eec9f9fcf1e..1ccf28db418f 100644 --- a/airflow/www/templates/appbuilder/navbar_right.html +++ b/airflow/www/templates/appbuilder/navbar_right.html @@ -67,8 +67,13 @@