diff --git a/.gitattributes b/.gitattributes index 497db03fbcfc5..5f8117153f511 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16,8 +16,6 @@ tests export-ignore Dockerfile.ci export-ignore ISSUE_TRIAGE_PROCESS.rst export-ignore -PULL_REQUEST_WORKFLOW.rst export-ignore -SELECTIVE_CHECKS.md export-ignore STATIC_CODE_CHECKS.rst export-ignore TESTING.rst export-ignore LOCAL_VIRTUALENV.rst export-ignore diff --git a/.github/actions/checks-action b/.github/actions/checks-action deleted file mode 160000 index 9f02872da71b6..0000000000000 --- a/.github/actions/checks-action +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9f02872da71b6f558c6a6f190f925dde5e4d8798 diff --git a/.github/actions/label-when-approved-action b/.github/actions/label-when-approved-action deleted file mode 160000 index 0058d0094da27..0000000000000 --- a/.github/actions/label-when-approved-action +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0058d0094da27e116fad6e0da516ebe1107f26de diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 9970a82e6c213..df53d8986c809 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -27,7 +27,6 @@ env: ANSWER: "yes" CHECK_IMAGE_FOR_REBUILD: "true" SKIP_CHECK_REMOTE_IMAGE: "true" - DEBIAN_VERSION: "bullseye" DB_RESET: "true" VERBOSE: "true" GITHUB_REPOSITORY: ${{ github.repository }} @@ -51,20 +50,22 @@ jobs: name: "Build Info" runs-on: ${{ github.repository == 'apache/airflow' && 'self-hosted' || 'ubuntu-20.04' }} env: - targetBranch: ${{ github.event.pull_request.base.ref }} + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} outputs: - runsOn: ${{ github.repository == 'apache/airflow' && '["self-hosted"]' || '["ubuntu-20.04"]' }} - pythonVersions: "${{ steps.selective-checks.python-versions }}" - upgradeToNewerDependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} - allPythonVersions: ${{ steps.selective-checks.outputs.all-python-versions }} - defaultPythonVersion: ${{ steps.selective-checks.outputs.default-python-version }} + runs-on: ${{ github.repository == 'apache/airflow' && '["self-hosted"]' || '["ubuntu-20.04"]' }} + python-versions: "${{ steps.selective-checks.python-versions }}" + upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} + all-python-versions-list-as-string: >- + ${{ steps.selective-checks.outputs.all-python-versions-list-as-string }} + default-python-version: ${{ steps.selective-checks.outputs.default-python-version }} run-tests: ${{ steps.selective-checks.outputs.run-tests }} run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} - image-build: ${{ steps.dynamic-outputs.outputs.image-build }} - cacheDirective: ${{ steps.dynamic-outputs.outputs.cacheDirective }} - targetBranch: ${{ steps.dynamic-outputs.outputs.targetBranch }} - defaultBranch: ${{ steps.selective-checks.outputs.default-branch }} - targetCommitSha: "${{steps.discover-pr-merge-commit.outputs.targetCommitSha || + image-build: ${{ steps.selective-checks.outputs.image-build }} + cache-directive: ${{ steps.selective-checks.outputs.cache-directive }} + default-branch: ${{ steps.selective-checks.outputs.default-branch }} + default-constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} + debian-version: ${{ steps.selective-checks.outputs.debian-version }} + target-commit-sha: "${{steps.discover-pr-merge-commit.outputs.target-commit-sha || github.event.pull_request.head.sha || github.sha }}" @@ -74,14 +75,14 @@ jobs: run: | TARGET_COMMIT_SHA="$(gh api '${{ github.event.pull_request.url }}' --jq .merge_commit_sha)" echo "TARGET_COMMIT_SHA=$TARGET_COMMIT_SHA" >> $GITHUB_ENV - echo "::set-output name=targetCommitSha::${TARGET_COMMIT_SHA}" + echo "::set-output name=target-commit-sha::${TARGET_COMMIT_SHA}" if: github.event_name == 'pull_request_target' # The labels in the event aren't updated when re-triggering the job, So lets hit the API to get # up-to-date values - name: Get latest PR labels id: get-latest-pr-labels run: | - echo -n "::set-output name=pullRequestLabels::" + echo -n "::set-output name=pull-request-labels::" gh api graphql --paginate -F node_id=${{github.event.pull_request.node_id}} -f query=' query($node_id: ID!, $endCursor: String) { node(id:$node_id) { @@ -103,112 +104,103 @@ jobs: ref: ${{ env.TARGET_COMMIT_SHA }} persist-credentials: false fetch-depth: 2 - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + - name: "Setup python" + uses: actions/setup-python@v4 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - name: "Retrieve defaults from branch_defaults.py" + # We cannot "execute" the branch_defaults.py python code here because that would be + # a security problem (we cannot run any code that comes from the sources coming from the PR. + # Therefore, we extract the branches via embedded Python code + # we need to do it before next step replaces checked-out breeze and scripts code coming from + # the PR, because the PR defaults have to be retrieved here. + id: defaults + run: | + python - <>$GITHUB_ENV + from pathlib import Path + import re + import sys + + DEFAULTS_CONTENT = Path('dev/breeze/src/airflow_breeze/branch_defaults.py').read_text() + BRANCH_PATTERN = r'^AIRFLOW_BRANCH = "(.*)"$' + CONSTRAINTS_BRANCH_PATTERN = r'^DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH = "(.*)"$' + DEBIAN_VERSION_PATTERN = r'^DEBIAN_VERSION = "(.*)"$' + + branch = re.search(BRANCH_PATTERN, DEFAULTS_CONTENT, re.MULTILINE).group(1) + constraints_branch = re.search(CONSTRAINTS_BRANCH_PATTERN, DEFAULTS_CONTENT, re.MULTILINE).group(1) + debian_version = re.search(DEBIAN_VERSION_PATTERN, DEFAULTS_CONTENT, re.MULTILINE).group(1) + + output = f""" + DEFAULT_BRANCH={branch} + DEFAULT_CONSTRAINTS_BRANCH={constraints_branch} + DEBIAN_VERSION={debian_version} + """.strip() + + print(output) + # Stdout is redirected to GITHUB_ENV but we also print it to stderr to see it in ci log + print(output, file=sys.stderr) + EOF + - name: Checkout main branch to 'main-airflow' folder to use breeze from there. + uses: actions/checkout@v3 with: persist-credentials: false submodules: recursive + - run: ./scripts/ci/install_breeze.sh - name: Selective checks id: selective-checks env: - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} - run: | - if [[ ${GITHUB_EVENT_NAME} == "pull_request_target" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi - - name: Compute dynamic outputs - id: dynamic-outputs - run: | - set -x - if [[ "${{ github.event_name }}" == 'pull_request_target' ]]; then - echo "::set-output name=targetBranch::${targetBranch}" - else - # Direct push to branch, or scheduled build - echo "::set-output name=targetBranch::${GITHUB_REF#refs/heads/}" - fi - - if [[ "${{ github.event_name }}" == 'schedule' ]]; then - echo "::set-output name=cacheDirective::disabled" - else - echo "::set-output name=cacheDirective:registry" - fi - - if [[ "$SELECTIVE_CHECKS_IMAGE_BUILD" == "true" ]]; then - echo "::set-output name=image-build::true" - else - echo "::set-output name=image-build::false" - fi - env: - SELECTIVE_CHECKS_IMAGE_BUILD: ${{ steps.selective-checks.outputs.image-build }} + PR_LABELS: "$${{ steps.get-latest-pr-labels.outputs.pull-request-labels }}" + COMMIT_REF: "${{ env.TARGET_COMMIT_SHA }}" + run: breeze selective-check - name: env run: printenv env: - dynamicOutputs: ${{ toJSON(steps.dynamic-outputs.outputs) }} - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} + PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pull-request-labels }} GITHUB_CONTEXT: ${{ toJson(github) }} build-ci-images: permissions: packages: write timeout-minutes: 80 - name: "Build CI image ${{matrix.python-version}}" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + name: "Build CI images ${{ needs.build-info.outputs.all-python-versions-list-as-string }}" + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] - strategy: - matrix: - python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} - fail-fast: true if: | needs.build-info.outputs.image-build == 'true' && github.event.pull_request.head.repo.full_name != 'apache/airflow' env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} BACKEND: sqlite outputs: ${{toJSON(needs.build-info.outputs) }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: - ref: ${{ needs.build-info.outputs.targetCommitSha }} + ref: ${{ needs.build-info.outputs.target-commit-sha }} persist-credentials: false submodules: recursive - - name: "Retrieve DEFAULTS from the _initialization.sh" - # We cannot "source" the script here because that would be a security problem (we cannot run - # any code that comes from the sources coming from the PR. Therefore, we extract the - # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands - id: defaults - run: | - DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV - DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \ - scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV - DEBIAN_VERSION=$(grep "export DEBIAN_VERSION" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEBIAN_VERSION=${DEBIAN_VERSION}" >> $GITHUB_ENV + - name: "Setup python" + uses: actions/setup-python@v4 + with: + python-version: ${{ needs.build-info.outputs.default-python-version }} - name: > - Checkout "${{ needs.build-info.outputs.targetBranch }}" branch to 'main-airflow' folder + Checkout "main branch to 'main-airflow' folder to use ci/scripts from there. - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: "main-airflow" - ref: "${{ needs.build-info.outputs.targetBranch }}" + ref: "main" persist-credentials: false submodules: recursive - - name: "Setup python" - uses: actions/setup-python@v2 - with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} - name: > - Override "scripts/ci" with the "${{ needs.build-info.outputs.targetBranch }}" branch + Override "scripts/ci" with the "main" branch so that the PR does not override it # We should not override those scripts which become part of the image as they will not be # changed in the image built - we should only override those that are executed to build @@ -218,28 +210,45 @@ jobs: rm -rfv "dev" mv -v "main-airflow/scripts/ci" "scripts" mv -v "main-airflow/dev" "." - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh - name: "Free space" run: breeze free-space - - name: Build & Push CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} - run: breeze build-image --push-image --tag-as-latest + - name: Cache pre-commit envs + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: "pre-commit-${{steps.host-python-version.outputs.host-python-version}}-\ +${{ hashFiles('.pre-commit-config.yaml') }}" + restore-keys: pre-commit-${{steps.host-python-version.outputs.host-python-version}} + - name: "Regenerate dependencies in case they was modified manually so that we can build an image" + run: > + breeze static-checks --type update-providers-dependencies --all-files + --show-diff-on-failure --color always || true + if: needs.build-info.outputs.default-branch == 'main' + - name: >- + Build & Push AMD64 CI images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + run: breeze build-image --push-image --tag-as-latest --run-in-parallel env: - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} - name: Push empty CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} if: failure() || cancelled() - run: breeze build-image --push-image --empty-image + run: breeze build-image --push-image --empty-image --run-in-parallel env: IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - - name: "Candidates for pip resolver backtrack triggers: ${{ matrix.python-version }}" + - name: "Candidates for pip resolver backtrack triggers" if: failure() || cancelled() - run: breeze find-newer-dependencies --max-age 1 --python "${{ matrix.python-version }}" + run: > + breeze find-newer-dependencies --max-age 1 + --python "${{ needs.build-info.outputs.default-python-version }}" - name: "Fix ownership" run: breeze fix-ownership if: always() @@ -248,59 +257,41 @@ jobs: permissions: packages: write timeout-minutes: 80 - name: "Build PROD image ${{matrix.python-version}}" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + name: "Build PROD images ${{ needs.build-info.outputs.all-python-versions-list-as-string }}" + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, build-ci-images] - strategy: - matrix: - python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} - fail-fast: true if: | needs.build-info.outputs.image-build == 'true' && github.event.pull_request.head.repo.full_name != 'apache/airflow' env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} BACKEND: sqlite steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: - ref: ${{ needs.build-info.outputs.targetCommitSha }} + ref: ${{ needs.build-info.outputs.target-commit-sha }} persist-credentials: false submodules: recursive - - name: "Retrieve DEFAULTS from the _initialization.sh" - # We cannot "source" the script here because that would be a security problem (we cannot run - # any code that comes from the sources coming from the PR. Therefore we extract the - # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands - id: defaults - run: | - DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV - DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \ - scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV - DEBIAN_VERSION=$(grep "export DEBIAN_VERSION" scripts/ci/libraries/_initialization.sh | \ - cut -d "=" -f 3 | sed s'/["}]//g') - echo "DEBIAN_VERSION=${DEBIAN_VERSION}" >> $GITHUB_ENV + - name: "Setup python" + uses: actions/setup-python@v4 + with: + python-version: ${{ needs.build-info.outputs.default-python-version }} - name: > - Checkout "${{ needs.build-info.outputs.targetBranch }}" branch to 'main-airflow' folder + Checkout "main" branch to 'main-airflow' folder to use ci/scripts from there. - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: "main-airflow" - ref: "${{ needs.build-info.outputs.targetBranch }}" + ref: "main" persist-credentials: false submodules: recursive - - name: "Setup python" - uses: actions/setup-python@v2 - with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} - name: > - Override "scripts/ci" with the "${{ needs.build-info.outputs.targetBranch }}" branch + Override "scripts/ci" with the "main" branch so that the PR does not override it # We should not override those scripts which become part of the image as they will not be # changed in the image built - we should only override those that are executed to build @@ -310,21 +301,34 @@ jobs: rm -rfv "dev" mv -v "main-airflow/scripts/ci" "scripts" mv -v "main-airflow/dev" "." - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh - name: "Free space" run: breeze free-space + - name: Cache pre-commit envs + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: "pre-commit-${{steps.host-python-version.outputs.host-python-version}}-\ +${{ hashFiles('.pre-commit-config.yaml') }}" + restore-keys: pre-commit-${{steps.host-python-version.outputs.host-python-version}} + if: needs.build-info.outputs.default-branch == 'main' + - name: "Regenerate dependencies in case they was modified manually so that we can build an image" + run: > + breeze static-checks --type update-providers-dependencies --all-files + --show-diff-on-failure --color always || true + if: needs.build-info.outputs.default-branch == 'main' - name: > Pull CI image for PROD build: - ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.default-python-version }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} run: breeze pull-image --tag-as-latest env: # Always use default Python version of CI image for preparing packages - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Cleanup dist and context file" run: rm -fv ./dist/* ./docker-context-files/* @@ -338,23 +342,99 @@ jobs: run: breeze prepare-airflow-package --package-format wheel --version-suffix-for-pypi dev0 - name: "Move dist packages to docker-context files" run: mv -v ./dist/*.whl ./docker-context-files - - name: Build & Push PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: >- + Build & Push PROD images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} run: > breeze build-prod-image + --run-in-parallel --tag-as-latest --push-image --install-packages-from-context --disable-airflow-repo-cache --airflow-is-in-context env: - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - - name: Push empty PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + - name: Push empty PROD images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} if: failure() || cancelled() - run: breeze build-prod-image --cleanup-context --push-image --empty-image + run: breeze build-prod-image --cleanup-context --push-image --empty-image --run-in-parallel + env: + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + - name: "Fix ownership" + run: breeze fix-ownership + if: always() + + + build-ci-images-arm: + timeout-minutes: 120 + name: "Build ARM CI images ${{ needs.build-info.outputs.all-python-versions-list-as-string }}" + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} + needs: [build-info, build-prod-images] + if: | + needs.build-info.outputs.image-build == 'true' && + needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' && + github.event.pull_request.head.repo.full_name != 'apache/airflow' + env: + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} + BACKEND: sqlite + outputs: ${{toJSON(needs.build-info.outputs) }} + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - uses: actions/checkout@v2 + with: + ref: ${{ needs.build-info.outputs.target-commit-sha }} + persist-credentials: false + submodules: recursive + - name: > + Checkout "main" branch to 'main-airflow' folder + to use ci/scripts from there. + uses: actions/checkout@v2 + with: + path: "main-airflow" + ref: "main" + persist-credentials: false + submodules: recursive + - name: > + Override "scripts/ci" with the "main" branch + so that the PR does not override it + # We should not override those scripts which become part of the image as they will not be + # changed in the image built - we should only override those that are executed to build + # the image. + run: | + rm -rfv "scripts/ci" + rm -rfv "dev" + mv -v "main-airflow/scripts/ci" "scripts" + mv -v "main-airflow/dev" "." + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ needs.build-info.outputs.default-python-version }} + - run: ./scripts/ci/install_breeze.sh + - name: "Free space" + run: breeze free-space + - name: "Start ARM instance" + run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh + - name: > + Build ARM CI images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + run: > + breeze build-image --run-in-parallel --parallelism 1 + --builder airflow_cache --platform "linux/arm64" env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + - name: "Stop ARM instance" + run: ./scripts/ci/images/ci_stop_arm_instance.sh + if: always() - name: "Fix ownership" run: breeze fix-ownership if: always() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05781d33d7cc2..33cbb0b7515a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,7 +80,6 @@ jobs: "XD-DENG", "aijamalnk", "alexvanboxel", - "aneesh-joseph", "aoen", "artwr", "ashb", @@ -107,6 +106,7 @@ jobs: "milton0825", "mistercrunch", "msumit", + "pingzh", "potiuk", "r39132", "ryanahamilton", @@ -125,33 +125,28 @@ jobs: env: GITHUB_CONTEXT: ${{ toJson(github) }} outputs: - defaultBranch: ${{ steps.selective-checks.outputs.default-branch }} - cacheDirective: ${{ steps.dynamic-outputs.outputs.cacheDirective }} - waitForImage: ${{ steps.wait-for-image.outputs.wait-for-image }} - allPythonVersions: ${{ steps.selective-checks.outputs.all-python-versions }} - upgradeToNewerDependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} - pythonVersions: ${{ steps.selective-checks.outputs.python-versions }} - pythonVersionsListAsString: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} - defaultPythonVersion: ${{ steps.selective-checks.outputs.default-python-version }} - kubernetesVersions: ${{ steps.selective-checks.outputs.kubernetes-versions }} - kubernetesVersionsListAsString: ${{ steps.selective-checks.outputs.kubernetes-versions-list-as-string }} - defaultKubernetesVersion: ${{ steps.selective-checks.outputs.default-kubernetes-version }} - kubernetesModes: ${{ steps.selective-checks.outputs.kubernetes-modes }} - defaultKubernetesMode: ${{ steps.selective-checks.outputs.default-kubernetes-mode }} - postgresVersions: ${{ steps.selective-checks.outputs.postgres-versions }} - defaultPostgresVersion: ${{ steps.selective-checks.outputs.default-postgres-version }} - mysqlVersions: ${{ steps.selective-checks.outputs.mysql-versions }} - mssqlVersions: ${{ steps.selective-checks.outputs.mssql-versions }} - defaultMySQLVersion: ${{ steps.selective-checks.outputs.default-mysql-version }} - helmVersions: ${{ steps.selective-checks.outputs.helm-versions }} - defaultHelmVersion: ${{ steps.selective-checks.outputs.default-helm-version }} - kindVersions: ${{ steps.selective-checks.outputs.kind-versions }} - defaultKindVersion: ${{ steps.selective-checks.outputs.default-kind-version }} - testTypes: ${{ steps.selective-checks.outputs.test-types }} - postgresExclude: ${{ steps.selective-checks.outputs.postgres-exclude }} - mysqlExclude: ${{ steps.selective-checks.outputs.mysql-exclude }} - mssqlExclude: ${{ steps.selective-checks.outputs.mssql-exclude }} - sqliteExclude: ${{ steps.selective-checks.outputs.sqlite-exclude }} + debian-version: ${{ steps.selective-checks.outputs.debian-version }} + cache-directive: ${{ steps.selective-checks.outputs.cache-directive }} + upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} + python-versions: ${{ steps.selective-checks.outputs.python-versions }} + python-versions-list-as-string: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} + all-python-versions-list-as-string: >- + ${{ steps.selective-checks.outputs.all-python-versions-list-as-string }} + default-python-version: ${{ steps.selective-checks.outputs.default-python-version }} + kubernetes-versions-list-as-string: >- + ${{ steps.selective-checks.outputs.kubernetes-versions-list-as-string }} + postgres-versions: ${{ steps.selective-checks.outputs.postgres-versions }} + default-postgres-version: ${{ steps.selective-checks.outputs.default-postgres-version }} + mysql-versions: ${{ steps.selective-checks.outputs.mysql-versions }} + mssql-versions: ${{ steps.selective-checks.outputs.mssql-versions }} + default-mysql-version: ${{ steps.selective-checks.outputs.default-mysql-version }} + default-helm-version: ${{ steps.selective-checks.outputs.default-helm-version }} + default-kind-version: ${{ steps.selective-checks.outputs.default-kind-version }} + test-types: ${{ steps.selective-checks.outputs.test-types }} + postgres-exclude: ${{ steps.selective-checks.outputs.postgres-exclude }} + mysql-exclude: ${{ steps.selective-checks.outputs.mysql-exclude }} + mssql-exclude: ${{ steps.selective-checks.outputs.mssql-exclude }} + sqlite-exclude: ${{ steps.selective-checks.outputs.sqlite-exclude }} run-tests: ${{ steps.selective-checks.outputs.run-tests }} run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} run-www-tests: ${{ steps.selective-checks.outputs.run-www-tests }} @@ -163,19 +158,21 @@ jobs: needs-api-tests: ${{ steps.selective-checks.outputs.needs-api-tests }} needs-api-codegen: ${{ steps.selective-checks.outputs.needs-api-codegen }} default-branch: ${{ steps.selective-checks.outputs.default-branch }} - sourceHeadRepo: ${{ steps.source-run-info.outputs.sourceHeadRepo }} - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - pullRequestLabels: ${{ steps.source-run-info.outputs.pullRequestLabels }} - runsOn: ${{ steps.set-runs-on.outputs.runsOn }} - runCoverage: ${{ steps.set-run-coverage.outputs.runCoverage }} - inWorkflowBuild: ${{ steps.set-in-workflow-build.outputs.inWorkflowBuild }} - buildJobDescription: ${{ steps.set-in-workflow-build.outputs.buildJobDescription }} - mergeRun: ${{ steps.set-merge-run.outputs.merge-run }} + default-constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} + docs-filter: ${{ steps.selective-checks.outputs.docs-filter }} + skip-pre-commits: ${{ steps.selective-checks.outputs.skip-pre-commits }} + source-head-repo: ${{ steps.source-run-info.outputs.source-head-repo }} + pull-request-labels: ${{ steps.source-run-info.outputs.pullRequestLabels }} + runs-on: ${{ steps.set-runs-on.outputs.runs-on }} + run-coverage: ${{ steps.set-run-coverage.outputs.run-coverage }} + in-workflow-build: ${{ steps.set-in-workflow-build.outputs.in-workflow-build }} + build-job-description: ${{ steps.set-in-workflow-build.outputs.build-job-description }} + merge-run: ${{ steps.set-merge-run.outputs.merge-run }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false submodules: recursive @@ -185,46 +182,78 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} - name: Fetch incoming commit ${{ github.sha }} with its parent - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: ref: ${{ github.sha }} fetch-depth: 2 persist-credentials: false - if: github.event_name == 'pull_request' + - name: "Setup python" + uses: actions/setup-python@v4 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - name: "Retrieve defaults from branch_defaults.py" + id: defaults + # We could retrieve it differently here - by just importing the variables and + # printing them from python code, however we want to have the same code as used in + # the build-images.yml (there we cannot import python code coming from the PR - we need to + # treat the python code as text and extract the variables from there. + run: | + python - <>$GITHUB_ENV + from pathlib import Path + import re + import sys + + DEFAULTS_CONTENT = Path('dev/breeze/src/airflow_breeze/branch_defaults.py').read_text() + BRANCH_PATTERN = r'^AIRFLOW_BRANCH = "(.*)"$' + CONSTRAINTS_BRANCH_PATTERN = r'^DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH = "(.*)"$' + DEBIAN_VERSION_PATTERN = r'^DEBIAN_VERSION = "(.*)"$' + + branch = re.search(BRANCH_PATTERN, DEFAULTS_CONTENT, re.MULTILINE).group(1) + constraints_branch = re.search(CONSTRAINTS_BRANCH_PATTERN, DEFAULTS_CONTENT, re.MULTILINE).group(1) + debian_version = re.search(DEBIAN_VERSION_PATTERN, DEFAULTS_CONTENT, re.MULTILINE).group(1) + + output = f""" + DEFAULT_BRANCH={branch} + DEFAULT_CONSTRAINTS_BRANCH={constraints_branch} + DEBIAN_VERSION={debian_version} + """.strip() + + print(output) + # Stdout is redirected to GITHUB_ENV but we also print it to stderr to see it in ci log + print(output, file=sys.stderr) + EOF + - run: ./scripts/ci/install_breeze.sh - name: Selective checks id: selective-checks env: - PR_LABELS: "${{ steps.source-run-info.outputs.pullRequestLabels }}" - run: | - if [[ ${GITHUB_EVENT_NAME} == "pull_request" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${GITHUB_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi + PR_LABELS: "${{ steps.source-run-info.outputs.pull-request-labels }}" + COMMIT_REF: "${{ github.sha }}" + run: breeze selective-check # Avoid having to specify the runs-on logic every time. We use the custom # env var AIRFLOW_SELF_HOSTED_RUNNER set only on our runners, but never # on the public runners - name: Set runs-on id: set-runs-on env: - PR_LABELS: "${{ steps.source-run-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ steps.source-run-info.outputs.pull-request-labels }}" run: | if [[ ${PR_LABELS=} == *"use public runners"* ]]; then echo "Forcing running on Public Runners via `use public runners` label" - echo "::set-output name=runsOn::\"ubuntu-20.04\"" + echo "::set-output name=runs-on::\"ubuntu-20.04\"" elif [[ ${AIRFLOW_SELF_HOSTED_RUNNER} == "" ]]; then echo "Regular PR running with Public Runner" - echo "::set-output name=runsOn::\"ubuntu-20.04\"" + echo "::set-output name=runs-on::\"ubuntu-20.04\"" else echo "Maintainer or main run running with self-hosted runner" - echo "::set-output name=runsOn::\"self-hosted\"" + echo "::set-output name=runs-on::\"self-hosted\"" fi # Avoid having to specify the coverage logic every time. - name: Set run coverage id: set-run-coverage - run: echo "::set-output name=runCoverage::true" + run: echo "::set-output name=run-coverage::true" if: > github.ref == 'refs/heads/main' && github.repository == 'apache/airflow' && github.event_name == 'push' && @@ -240,12 +269,12 @@ jobs: if [[ ${GITHUB_EVENT_NAME} == "push" || ${GITHUB_EVENT_NAME} == "push" || \ ${{steps.source-run-info.outputs.sourceHeadRepo}} == "apache/airflow" ]]; then echo "Images will be built in current workflow" - echo "::set-output name=inWorkflowBuild::true" - echo "::set-output name=buildJobDescription::Build" + echo "::set-output name=in-workflow-build::true" + echo "::set-output name=build-job-description::Build" else echo "Images will be built in pull_request_target workflow" - echo "::set-output name=inWorkflowBuild::false" - echo "::set-output name=buildJobDescription::Skip Build (pull_request_target)" + echo "::set-output name=in-workflow-build::false" + echo "::set-output name=build-job-description::Skip Build (pull_request_target)" fi - name: Determine if this is merge run id: set-merge-run @@ -257,204 +286,195 @@ jobs: github.ref_name == 'main' || startsWith(github.ref_name, 'v2') && endsWith(github.ref_name, 'test') ) - - name: Compute dynamic outputs - id: dynamic-outputs - run: | - set -x - if [[ "${{ github.event_name }}" == 'schedule' ]]; then - echo "::set-output name=cacheDirective::disabled" - else - echo "::set-output name=cacheDirective::registry" - fi - - if [[ "$SELECTIVE_CHECKS_IMAGE_BUILD" == "true" ]]; then - echo "::set-output name=image-build::true" - else - echo "::set-output name=image-build::false" - fi - env: - SELECTIVE_CHECKS_IMAGE_BUILD: ${{ steps.selective-checks.outputs.image-build }} - name: env run: printenv env: - dynamicOutputs: ${{ toJSON(steps.dynamic-outputs.outputs) }} - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} + PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pull-request-labels }} GITHUB_CONTEXT: ${{ toJson(github) }} build-ci-images: permissions: packages: write timeout-minutes: 80 - name: "${{needs.build-info.outputs.buildJobDescription}} CI image ${{matrix.python-version}}" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + name: >- + ${{needs.build-info.outputs.build-job-description}} CI images + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] - strategy: - matrix: - python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} - fail-fast: true env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - uses: actions/checkout@v2 + if: needs.build-info.outputs.in-workflow-build == 'true' + - uses: actions/checkout@v3 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false submodules: recursive - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - name: "Retrieve DEFAULTS from the _initialization.sh" - # We cannot "source" the script here because that would be a security problem (we cannot run - # any code that comes from the sources coming from the PR. Therefore we extract the - # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands - id: defaults - run: | - DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV - DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \ - scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV - DEBIAN_VERSION=$(grep "export DEBIAN_VERSION" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEBIAN_VERSION=${DEBIAN_VERSION}" >> $GITHUB_ENV - if: needs.build-info.outputs.inWorkflowBuild == 'true' + python-version: ${{ needs.build-info.outputs.default-python-version }} + if: needs.build-info.outputs.in-workflow-build == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Free space" run: breeze free-space - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - name: Build & Push CI image ${{ matrix.python-version }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} - run: breeze build-image --push-image --tag-as-latest + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: Cache pre-commit envs + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: "pre-commit-${{steps.host-python-version.outputs.host-python-version}}-\ +${{ hashFiles('.pre-commit-config.yaml') }}" + restore-keys: pre-commit-${{steps.host-python-version.outputs.host-python-version}} + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: "Regenerate dependencies in case they was modified manually so that we can build an image" + run: > + breeze static-checks --type update-providers-dependencies --all-files + --show-diff-on-failure --color always || true + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: > + Build & Push CI images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + run: breeze build-image --push-image --tag-as-latest --run-in-parallel env: - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - name: "Candidates for pip resolver backtrack triggers: ${{ matrix.python-version }}" + PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: "Candidates for pip resolver backtrack triggers" if: failure() || cancelled() - run: breeze find-newer-dependencies --max-age 1 --python "${{ matrix.python-version }}" + run: > + breeze find-newer-dependencies --max-age 1 + --python "${{ needs.build-info.outputs.default-python-version }}" - name: "Fix ownership" run: breeze fix-ownership - if: always() && needs.build-info.outputs.inWorkflowBuild == 'true' + if: always() && needs.build-info.outputs.in-workflow-build == 'true' build-prod-images: permissions: packages: write timeout-minutes: 80 - name: "${{needs.build-info.outputs.buildJobDescription}} PROD image ${{matrix.python-version}}" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + name: > + ${{needs.build-info.outputs.build-job-description}} PROD images + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, build-ci-images] - strategy: - matrix: - python-version: ${{ fromJson(needs.build-info.outputs.allPythonVersions) }} - fail-fast: true env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn)[0] }} + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} BACKEND: sqlite - PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} VERSION_SUFFIX_FOR_PYPI: "dev0" steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - uses: actions/checkout@v2 + if: needs.build-info.outputs.in-workflow-build == 'true' + - uses: actions/checkout@v3 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false submodules: recursive - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - name: "Retrieve DEFAULTS from the _initialization.sh" - # We cannot "source" the script here because that would be a security problem (we cannot run - # any code that comes from the sources coming from the PR. Therefore we extract the - # DEFAULT_BRANCH and DEFAULT_CONSTRAINTS_BRANCH and DEBIAN_VERSION via custom grep/awk/sed commands - id: defaults - run: | - DEFAULT_BRANCH=$(grep "export DEFAULT_BRANCH" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV - DEFAULT_CONSTRAINTS_BRANCH=$(grep "export DEFAULT_CONSTRAINTS_BRANCH" \ - scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH}" >> $GITHUB_ENV - DEBIAN_VERSION=$(grep "export DEBIAN_VERSION" scripts/ci/libraries/_initialization.sh | \ - awk 'BEGIN{FS="="} {print $3}' | sed s'/["}]//g') - echo "DEBIAN_VERSION=${DEBIAN_VERSION}" >> $GITHUB_ENV - if: needs.build-info.outputs.inWorkflowBuild == 'true' + python-version: ${{ needs.build-info.outputs.default-python-version }} + if: needs.build-info.outputs.in-workflow-build == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Free space" run: breeze free-space - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: Cache pre-commit envs + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: "pre-commit-${{steps.host-python-version.outputs.host-python-version}}-\ +${{ hashFiles('.pre-commit-config.yaml') }}" + restore-keys: pre-commit-${{steps.host-python-version.outputs.host-python-version}} + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' + - name: "Regenerate dependencies in case they was modified manually so that we can build an image" + run: > + breeze static-checks --type update-providers-dependencies --all-files + --show-diff-on-failure --color always || true + if: > + needs.build-info.outputs.in-workflow-build == 'true' && + needs.build-info.outputs.default-branch == 'main' - name: > Pull CI image for PROD build: - ${{ needs.build-info.outputs.defaultPythonVersion }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }}" + ${{ needs.build-info.outputs.default-python-version }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }}" run: breeze pull-image --tag-as-latest env: # Always use default Python version of CI image for preparing packages - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Cleanup dist and context file" run: rm -fv ./dist/* ./docker-context-files/* - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Prepare providers packages" run: > breeze prepare-provider-packages --package-list-file ./scripts/ci/installed_providers.txt --package-format wheel --version-suffix-for-pypi dev0 - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Prepare airflow package" run: breeze prepare-airflow-package --package-format wheel --version-suffix-for-pypi dev0 - if: needs.build-info.outputs.inWorkflowBuild == 'true' + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Move dist packages to docker-context files" run: mv -v ./dist/*.whl ./docker-context-files - if: needs.build-info.outputs.inWorkflowBuild == 'true' - - name: Build & Push PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: > + Build & Push PROD images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} run: > breeze build-prod-image --tag-as-latest + --run-in-parallel --push-image --install-packages-from-context --disable-airflow-repo-cache --airflow-is-in-context env: - UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgradeToNewerDependencies }} - DOCKER_CACHE: ${{ needs.build-info.outputs.cacheDirective }} + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - if: needs.build-info.outputs.inWorkflowBuild == 'true' + PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Fix ownership" run: breeze fix-ownership - if: always() && needs.build-info.outputs.inWorkflowBuild == 'true' + if: always() && needs.build-info.outputs.in-workflow-build == 'true' run-new-breeze-tests: timeout-minutes: 10 name: Breeze unit tests - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: persist-credentials: false - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: python -m pip install --editable ./dev/breeze/ @@ -464,18 +484,18 @@ jobs: tests-ui: timeout-minutes: 10 name: React UI tests - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] if: needs.build-info.outputs.run-ui-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup node" - uses: actions/setup-node@v2 + uses: actions/setup-node@v3 with: node-version: 14 - name: "Cache eslint" @@ -491,22 +511,22 @@ jobs: tests-www: timeout-minutes: 10 name: React WWW tests - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] if: needs.build-info.outputs.run-www-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup node" - uses: actions/setup-node@v2 + uses: actions/setup-node@v3 with: node-version: 14 - name: "Cache eslint" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: 'airflow/www/node_modules' key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/yarn.lock') }} @@ -519,14 +539,14 @@ jobs: test-openapi-client-generation: timeout-minutes: 10 name: "Test OpenAPI client generation" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] if: needs.build-info.outputs.needs-api-codegen == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 2 persist-credentials: false @@ -536,7 +556,7 @@ jobs: test-examples-of-prod-image-building: timeout-minutes: 60 name: "Test examples of production image building" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] if: needs.build-info.outputs.image-build == 'true' steps: @@ -548,9 +568,9 @@ jobs: fetch-depth: 2 persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/requirements.txt - name: "Test examples of PROD image building" @@ -561,23 +581,23 @@ jobs: wait-for-ci-images: timeout-minutes: 120 name: "Wait for CI images" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, build-ci-images] if: needs.build-info.outputs.image-build == 'true' env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: sqlite steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -587,7 +607,7 @@ jobs: id: wait-for-images run: breeze pull-image --run-in-parallel --verify-image --wait-for-image --tag-as-latest env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Fix ownership" run: breeze fix-ownership @@ -596,27 +616,27 @@ jobs: static-checks: timeout-minutes: 30 name: "Static checks" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} if: needs.build-info.outputs.basic-checks-only == 'false' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - name: Cache pre-commit envs - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.cache/pre-commit key: "pre-commit-${{steps.host-python-version.outputs.host-python-version}}-\ @@ -638,7 +658,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: breeze static-checks --all-files --show-diff-on-failure --color always env: VERBOSE: "false" - SKIP: "identity" + SKIP: ${{ needs.build-info.outputs.skip-pre-commits }} COLUMNS: "250" - name: "Fix ownership" run: breeze fix-ownership @@ -650,33 +670,33 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" static-checks-basic-checks-only: timeout-minutes: 30 name: "Static checks: basic checks only" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} if: needs.build-info.outputs.basic-checks-only == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - name: Cache pre-commit envs - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.cache/pre-commit key: "pre-commit-basic-${{steps.host-python-version.outputs.host-python-version}}-\ ${{ hashFiles('.pre-commit-config.yaml') }}" restore-keys: pre-commit-basic-${{steps.host-python-version.outputs.host-python-version}} - name: Fetch incoming commit ${{ github.sha }} with its parent - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: ref: ${{ github.sha }} fetch-depth: 2 @@ -695,7 +715,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" env: VERBOSE: "false" SKIP_IMAGE_PRE_COMMITS: "true" - SKIP: "identity" + SKIP: ${{ needs.build-info.outputs.skip-pre-commits }} COLUMNS: "250" - name: "Fix ownership" run: breeze fix-ownership @@ -704,23 +724,23 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" docs: timeout-minutes: 45 name: "Build docs" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] if: needs.build-info.outputs.docs-build == 'true' env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false submodules: recursive - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: - python-version: ${{needs.build-info.outputs.defaultPythonVersion}} + python-version: ${{needs.build-info.outputs.default-python-version}} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -730,7 +750,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: breeze pull-image --tag-as-latest env: IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - - uses: actions/cache@v2 + - uses: actions/cache@v3 id: cache-doc-inventories with: path: ./docs/_inventory_cache/ @@ -739,7 +759,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} docs-inventory- - name: "Build docs" - run: breeze build-docs + run: breeze build-docs ${{ needs.build-info.outputs.docs-filter }} - name: Configure AWS credentials uses: ./.github/actions/configure-aws-credentials if: > @@ -761,24 +781,24 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" prepare-test-provider-packages-wheel: timeout-minutes: 40 name: "Build and test provider packages wheel" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} if: needs.build-info.outputs.image-build == 'true' && needs.build-info.outputs.default-branch == 'main' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false if: needs.build-info.outputs.default-branch == 'main' - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -801,7 +821,9 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: > breeze verify-provider-packages --use-airflow-version wheel --use-packages-from-dist --package-format wheel - - name: "Remove airflow package and replace providers with 2.1-compliant versions" + env: + SKIP_CONSTRAINTS: "${{ needs.build-info.outputs.upgrade-to-newer-dependencies }}" + - name: "Remove airflow package and replace providers with 2.2-compliant versions" run: | rm -vf dist/apache_airflow-*.whl \ dist/apache_airflow_providers_cncf_kubernetes*.whl \ @@ -809,12 +831,12 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" pip download --no-deps --dest dist \ apache-airflow-providers-cncf-kubernetes==3.0.0 \ apache-airflow-providers-celery==2.1.3 - - name: "Install and test provider packages and airflow on Airflow 2.1 files" + - name: "Install and test provider packages and airflow on Airflow 2.2 files" run: > - breeze verify-provider-packages --use-airflow-version 2.1.0 - --use-packages-from-dist --package-format wheel --airflow-constraints-reference constraints-2.1.0 + breeze verify-provider-packages --use-airflow-version 2.2.0 + --use-packages-from-dist --package-format wheel --airflow-constraints-reference constraints-2.2.0 env: - # The extras below are all extras that should be installed with Airflow 2.1.0 + # The extras below are all extras that should be installed with Airflow 2.2.0 AIRFLOW_EXTRAS: "airbyte,alibaba,amazon,apache.atlas.apache.beam,apache.cassandra,apache.drill,\ apache.druid,apache.hdfs,apache.hive,apache.kylin,apache.livy,apache.pig,apache.pinot,\ apache.spark,apache.sqoop,apache.webhdfs,arangodb,asana,async,\ @@ -832,26 +854,26 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" if: always() prepare-test-provider-packages-sdist: - timeout-minutes: 40 + timeout-minutes: 80 name: "Build and test provider packages sdist" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} if: needs.build-info.outputs.image-build == 'true' && needs.build-info.outputs.default-branch == 'main' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false if: needs.build-info.outputs.default-branch == 'main' - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -878,6 +900,8 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: > breeze verify-provider-packages --use-airflow-version sdist --use-packages-from-dist --package-format sdist + env: + SKIP_CONSTRAINTS: "${{ needs.build-info.outputs.upgrade-to-newer-dependencies }}" - name: "Fix ownership" run: breeze fix-ownership if: always() @@ -885,15 +909,15 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" tests-helm: timeout-minutes: 80 name: "Python unit tests for helm chart" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} MOUNT_SELECTED_LOCAL_SOURCES: "true" TEST_TYPES: "Helm" BACKEND: "" DB_RESET: "false" - PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} + PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.default-python-version}} if: > needs.build-info.outputs.needs-helm-tests == 'true' && (github.repository == 'apache/airflow' || github.event_name != 'schedule') && @@ -902,13 +926,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -922,24 +946,25 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Tests: Helm" run: ./scripts/ci/testing/ci_run_airflow_testing.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Upload airflow logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: airflow-logs-helm path: "./files/airflow_logs*" retention-days: 7 - name: "Upload container logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: container-logs-helm path: "./files/container_logs*" retention-days: 7 - name: "Upload artifact for coverage" - uses: actions/upload-artifact@v2 - if: needs.build-info.outputs.runCoverage == 'true' + uses: actions/upload-artifact@v3 + if: needs.build-info.outputs.run-coverage == 'true' with: name: coverage-helm path: "./files/coverage*.xml" @@ -952,33 +977,33 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" timeout-minutes: 130 name: > Postgres${{matrix.postgres-version}},Py${{matrix.python-version}}: - ${{needs.build-info.outputs.testTypes}} - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + ${{needs.build-info.outputs.test-types}} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] strategy: matrix: - python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} - postgres-version: ${{ fromJson(needs.build-info.outputs.postgresVersions) }} - exclude: ${{ fromJson(needs.build-info.outputs.postgresExclude) }} + python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} + postgres-version: ${{ fromJson(needs.build-info.outputs.postgres-versions) }} + exclude: ${{ fromJson(needs.build-info.outputs.postgres-exclude) }} fail-fast: false env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: postgres POSTGRES_VERSION: ${{ matrix.postgres-version }} - TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + TEST_TYPES: "${{needs.build-info.outputs.test-types}}" PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -992,27 +1017,28 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: ./scripts/ci/testing/run_downgrade_test.sh - name: "Test Offline SQL generation" run: ./scripts/ci/testing/run_offline_sql_test.sh - - name: "Tests: ${{needs.build-info.outputs.testTypes}}" + - name: "Tests: ${{needs.build-info.outputs.test-types}}" run: ./scripts/ci/testing/ci_run_airflow_testing.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Upload airflow logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: airflow-logs-${{matrix.python-version}}-${{matrix.postgres-version}} path: "./files/airflow_logs*" retention-days: 7 - name: "Upload container logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: container-logs-postgres-${{matrix.python-version}}-${{matrix.postgres-version}} path: "./files/container_logs*" retention-days: 7 - name: "Upload artifact for coverage" - uses: actions/upload-artifact@v2 - if: needs.build-info.outputs.runCoverage == 'true' + uses: actions/upload-artifact@v3 + if: needs.build-info.outputs.run-coverage == 'true' with: name: coverage-postgres-${{matrix.python-version}}-${{matrix.postgres-version}} path: "./files/coverage*.xml" @@ -1024,33 +1050,33 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" tests-mysql: timeout-minutes: 130 name: > - MySQL${{matrix.mysql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + MySQL${{matrix.mysql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.test-types}} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] strategy: matrix: - python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} - mysql-version: ${{ fromJson(needs.build-info.outputs.mysqlVersions) }} - exclude: ${{ fromJson(needs.build-info.outputs.mysqlExclude) }} + python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} + mysql-version: ${{ fromJson(needs.build-info.outputs.mysql-versions) }} + exclude: ${{ fromJson(needs.build-info.outputs.mysql-exclude) }} fail-fast: false env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: mysql MYSQL_VERSION: ${{ matrix.mysql-version }} - TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + TEST_TYPES: "${{needs.build-info.outputs.test-types}}" PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1064,27 +1090,28 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: ./scripts/ci/testing/run_downgrade_test.sh - name: "Test Offline SQL generation" run: ./scripts/ci/testing/run_offline_sql_test.sh - - name: "Tests: ${{needs.build-info.outputs.testTypes}}" + - name: "Tests: ${{needs.build-info.outputs.test-types}}" run: ./scripts/ci/testing/ci_run_airflow_testing.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Upload airflow logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: airflow-logs-${{matrix.python-version}}-${{matrix.mysql-version}} path: "./files/airflow_logs*" retention-days: 7 - name: "Upload container logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: container-logs-mysql-${{matrix.python-version}}-${{matrix.mysql-version}} path: "./files/container_logs*" retention-days: 7 - name: "Upload artifact for coverage" - uses: actions/upload-artifact@v2 - if: needs.build-info.outputs.runCoverage == 'true' + uses: actions/upload-artifact@v3 + if: needs.build-info.outputs.run-coverage == 'true' with: name: coverage-mysql-${{matrix.python-version}}-${{matrix.mysql-version}} path: "./files/coverage*.xml" @@ -1096,33 +1123,33 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" tests-mssql: timeout-minutes: 130 name: > - MSSQL${{matrix.mssql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + MSSQL${{matrix.mssql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.test-types}} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] strategy: matrix: - python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} - mssql-version: ${{ fromJson(needs.build-info.outputs.mssqlVersions) }} - exclude: ${{ fromJson(needs.build-info.outputs.mssqlExclude) }} + python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} + mssql-version: ${{ fromJson(needs.build-info.outputs.mssql-versions) }} + exclude: ${{ fromJson(needs.build-info.outputs.mssql-exclude) }} fail-fast: false env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: mssql MSSQL_VERSION: ${{ matrix.mssql-version }} - TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + TEST_TYPES: "${{needs.build-info.outputs.test-types}}" PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1134,27 +1161,28 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Test downgrade" run: ./scripts/ci/testing/run_downgrade_test.sh - - name: "Tests: ${{needs.build-info.outputs.testTypes}}" + - name: "Tests: ${{needs.build-info.outputs.test-types}}" run: ./scripts/ci/testing/ci_run_airflow_testing.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Upload airflow logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: airflow-logs-${{matrix.python-version}}-${{matrix.mssql-version}} path: "./files/airflow_logs*" retention-days: 7 - name: "Upload container logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: container-logs-mssql-${{matrix.python-version}}-${{matrix.mssql-version}} path: "./files/container_logs*" retention-days: 7 - name: "Upload artifact for coverage" - uses: actions/upload-artifact@v2 - if: needs.build-info.outputs.runCoverage == 'true' + uses: actions/upload-artifact@v3 + if: needs.build-info.outputs.run-coverage == 'true' with: name: coverage-mssql-${{matrix.python-version}}-${{matrix.mssql-version}} path: "./files/coverage*.xml" @@ -1166,31 +1194,31 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" tests-sqlite: timeout-minutes: 130 name: > - Sqlite Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + Sqlite Py${{matrix.python-version}}: ${{needs.build-info.outputs.test-types}} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images] strategy: matrix: - python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} - exclude: ${{ fromJson(needs.build-info.outputs.sqliteExclude) }} + python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} + exclude: ${{ fromJson(needs.build-info.outputs.sqlite-exclude) }} fail-fast: false env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: sqlite - TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + TEST_TYPES: "${{needs.build-info.outputs.test-types}}" PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1202,27 +1230,28 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Test downgrade" run: ./scripts/ci/testing/run_downgrade_test.sh - - name: "Tests: ${{needs.build-info.outputs.testTypes}}" + - name: "Tests: ${{needs.build-info.outputs.test-types}}" run: ./scripts/ci/testing/ci_run_airflow_testing.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Upload airflow logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: airflow-logs-${{matrix.python-version}} path: './files/airflow_logs*' retention-days: 7 - name: "Upload container logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: container-logs-sqlite-${{matrix.python-version}} path: "./files/container_logs*" retention-days: 7 - name: "Upload artifact for coverage" - uses: actions/upload-artifact@v2 - if: needs.build-info.outputs.runCoverage == 'true' + uses: actions/upload-artifact@v3 + if: needs.build-info.outputs.run-coverage == 'true' with: name: coverage-sqlite-${{matrix.python-version}} path: ./files/coverage*.xml @@ -1234,27 +1263,27 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" tests-quarantined: timeout-minutes: 60 name: "Quarantined tests" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} continue-on-error: true needs: [build-info, wait-for-ci-images] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - MYSQL_VERSION: ${{needs.build-info.outputs.defaultMySQLVersion}} - POSTGRES_VERSION: ${{needs.build-info.outputs.defaultPostgresVersion}} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} + MYSQL_VERSION: ${{needs.build-info.outputs.default-mysql-version}} + POSTGRES_VERSION: ${{needs.build-info.outputs.default-postgres-version}} TEST_TYPES: "Quarantined" - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} if: needs.build-info.outputs.run-tests == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - name: "Set issue id for main" @@ -1279,31 +1308,31 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Tests: Quarantined" run: ./scripts/ci/testing/ci_run_quarantined_tests.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" - name: "Upload Quarantine test results" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: quarantined-tests path: "files/test_result-*.xml" retention-days: 7 - name: "Upload airflow logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: airflow-logs-quarantined-${{ matrix.backend }} path: "./files/airflow_logs*" retention-days: 7 - name: "Upload container logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() with: name: container-logs-quarantined-${{ matrix.backend }} path: "./files/container_logs*" retention-days: 7 - name: "Upload artifact for coverage" - uses: actions/upload-artifact@v2 - if: needs.build-info.outputs.runCoverage == 'true' + uses: actions/upload-artifact@v3 + if: needs.build-info.outputs.run-coverage == 'true' with: name: coverage-quarantined-${{ matrix.backend }} path: "./files/coverage*.xml" @@ -1315,7 +1344,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" upload-coverage: timeout-minutes: 15 name: "Upload coverage" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} continue-on-error: true needs: - build-info @@ -1325,19 +1354,19 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-mssql - tests-quarantined env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} # Only upload coverage on merges to main - if: needs.build-info.outputs.runCoverage == 'true' + if: needs.build-info.outputs.run-coverage == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false submodules: recursive - name: "Download all artifacts from the current build" - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ./coverage-files - name: "Removes unnecessary artifacts" @@ -1350,42 +1379,42 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" wait-for-prod-images: timeout-minutes: 120 name: "Wait for PROD images" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-ci-images, build-prod-images] if: needs.build-info.outputs.image-build == 'true' env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: sqlite - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh - name: "Free space" run: breeze free-space - name: "Cache virtualenv environment" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: '.build/.docker_venv' key: ${{ runner.os }}-docker-venv-${{ hashFiles('scripts/ci/images/ci_run_docker_tests.py') }} - name: Wait for PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} # We wait for the images to be available either from "build-images.yml' run as pull_request_target - # or from build-prod-image above. + # or from build-prod-images above. # We are utilising single job to wait for all images because this job merely waits # For the images to be available and test them. run: breeze pull-prod-image --verify-image --wait-for-image --run-in-parallel env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Fix ownership" run: breeze fix-ownership @@ -1394,23 +1423,23 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" test-docker-compose-quick-start: timeout-minutes: 60 name: "Test docker-compose quick start" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-prod-images] if: needs.build-info.outputs.image-build == 'true' env: - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 2 persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1427,27 +1456,27 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" if: always() tests-kubernetes: - timeout-minutes: 70 + timeout-minutes: 240 name: Helm Chart; ${{matrix.executor}} - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-prod-images] strategy: matrix: executor: [KubernetesExecutor, CeleryExecutor, LocalExecutor] fail-fast: false env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: postgres RUN_TESTS: "true" RUNTIME: "kubernetes" KUBERNETES_MODE: "image" EXECUTOR: ${{matrix.executor}} - KIND_VERSION: "${{ needs.build-info.outputs.defaultKindVersion }}" - HELM_VERSION: "${{ needs.build-info.outputs.defaultHelmVersion }}" + KIND_VERSION: "${{ needs.build-info.outputs.default-kind-version }}" + HELM_VERSION: "${{ needs.build-info.outputs.default-helm-version }}" CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: > - ${{needs.build-info.outputs.pythonVersionsListAsString}} + ${{needs.build-info.outputs.python-versions-list-as-string}} CURRENT_KUBERNETES_VERSIONS_AS_STRING: > - ${{needs.build-info.outputs.kubernetesVersionsListAsString}} + ${{needs.build-info.outputs.kubernetes-versions-list-as-string}} if: > ( needs.build-info.outputs.run-kubernetes-tests == 'true' || needs.build-info.outputs.needs-helm-tests == 'true' ) && @@ -1456,13 +1485,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1471,22 +1500,22 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: Pull PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} run: breeze pull-prod-image --run-in-parallel --tag-as-latest env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Cache bin folder with tools for kubernetes testing" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ".build/kubernetes-bin" key: "kubernetes-binaries --${{ needs.build-info.outputs.defaultKindVersion }}\ --${{ needs.build-info.outputs.defaultHelmVersion }}" +-${{ needs.build-info.outputs.default-kind-version }}\ +-${{ needs.build-info.outputs.default-helm-version }}" restore-keys: "kubernetes-binaries" - name: "Kubernetes Tests" run: ./scripts/ci/kubernetes/ci_setup_clusters_and_run_kubernetes_tests_in_parallel.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" - name: "Upload KinD logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() || cancelled() with: name: kind-logs-${{matrix.executor}} @@ -1499,21 +1528,21 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" tests-helm-executor-upgrade: timeout-minutes: 150 name: Helm Chart Executor Upgrade - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, wait-for-prod-images] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} BACKEND: postgres RUN_TESTS: "true" RUNTIME: "kubernetes" KUBERNETES_MODE: "image" EXECUTOR: "KubernetesExecutor" - KIND_VERSION: "${{ needs.build-info.outputs.defaultKindVersion }}" - HELM_VERSION: "${{ needs.build-info.outputs.defaultHelmVersion }}" + KIND_VERSION: "${{ needs.build-info.outputs.default-kind-version }}" + HELM_VERSION: "${{ needs.build-info.outputs.default-helm-version }}" CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: > - ${{needs.build-info.outputs.pythonVersionsListAsString}} + ${{needs.build-info.outputs.python-versions-list-as-string}} CURRENT_KUBERNETES_VERSIONS_AS_STRING: > - ${{needs.build-info.outputs.kubernetesVersionsListAsString}} + ${{needs.build-info.outputs.kubernetes-versions-list-as-string}} if: > needs.build-info.outputs.run-kubernetes-tests == 'true' && needs.build-info.outputs.default-branch == 'main' @@ -1521,13 +1550,13 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1536,33 +1565,33 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: Pull PROD images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} run: breeze pull-prod-image --run-in-parallel --tag-as-latest env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Cache virtualenv for kubernetes testing" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ".build/.kubernetes_venv" - key: "kubernetes-${{ needs.build-info.outputs.defaultPythonVersion }}\ - -${{needs.build-info.outputs.kubernetesVersionsListAsString}} - -${{needs.build-info.outputs.pythonVersionsListAsString}} + key: "kubernetes-${{ needs.build-info.outputs.default-python-version }}\ + -${{needs.build-info.outputs.kubernetes-versions-list-as-string}} + -${{needs.build-info.outputs.python-versions-list-as-string}} -${{ hashFiles('setup.py','setup.cfg') }}" - restore-keys: "kubernetes-${{ needs.build-info.outputs.defaultPythonVersion }}-\ - -${{needs.build-info.outputs.kubernetesVersionsListAsString}} - -${{needs.build-info.outputs.pythonVersionsListAsString}}" + restore-keys: "kubernetes-${{ needs.build-info.outputs.default-python-version }}-\ + -${{needs.build-info.outputs.kubernetes-versions-list-as-string}} + -${{needs.build-info.outputs.python-versions-list-as-string}}" - name: "Cache bin folder with tools for kubernetes testing" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ".build/kubernetes-bin" key: "kubernetes-binaries - -${{ needs.build-info.outputs.defaultKindVersion }}\ - -${{ needs.build-info.outputs.defaultHelmVersion }}" + -${{ needs.build-info.outputs.default-kind-version }}\ + -${{ needs.build-info.outputs.default-helm-version }}" restore-keys: "kubernetes-binaries" - name: "Kubernetes Helm Chart Executor Upgrade Tests" run: ./scripts/ci/kubernetes/ci_upgrade_cluster_with_different_executors_in_parallel.sh env: - PR_LABELS: "${{ needs.build-info.outputs.pullRequestLabels }}" + PR_LABELS: "${{ needs.build-info.outputs.pull-request-labels }}" - name: "Upload KinD logs" - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: failure() || cancelled() with: name: kind-logs-KubernetesExecutor @@ -1577,7 +1606,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" contents: write timeout-minutes: 40 name: "Constraints" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: - build-info - wait-for-ci-images @@ -1588,20 +1617,20 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-mssql - tests-postgres env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - if: needs.build-info.outputs.upgradeToNewerDependencies != 'false' + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} + if: needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false submodules: recursive - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1610,7 +1639,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: Pull CI images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG_FOR_THE_BUILD }} run: breeze pull-image --run-in-parallel --tag-as-latest env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Generate constraints" run: | @@ -1619,24 +1648,24 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" breeze generate-constraints --run-in-parallel --airflow-constraints-mode constraints-no-providers breeze generate-constraints --run-in-parallel --airflow-constraints-mode constraints env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.pythonVersionsListAsString }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} - name: "Set constraints branch name" id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh - if: needs.build-info.outputs.mergeRun == 'true' + if: needs.build-info.outputs.merge-run == 'true' - name: Checkout ${{ steps.constraints-branch.outputs.branch }} - uses: actions/checkout@v2 - if: needs.build-info.outputs.mergeRun == 'true' + uses: actions/checkout@v3 + if: needs.build-info.outputs.merge-run == 'true' with: path: "repo" ref: ${{ steps.constraints-branch.outputs.branch }} persist-credentials: false - - name: "Commit changed constraint files for ${{needs.build-info.outputs.pythonVersions}}" + - name: "Commit changed constraint files for ${{needs.build-info.outputs.python-versions}}" run: ./scripts/ci/constraints/ci_commit_constraints.sh - if: needs.build-info.outputs.mergeRun == 'true' + if: needs.build-info.outputs.merge-run == 'true' - name: "Push changes" uses: ./.github/actions/github-push-action - if: needs.build-info.outputs.mergeRun == 'true' + if: needs.build-info.outputs.merge-run == 'true' with: github_token: ${{ secrets.GITHUB_TOKEN }} branch: ${{ steps.constraints-branch.outputs.branch }} @@ -1654,31 +1683,31 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" packages: write timeout-minutes: 120 name: "Push Image Cache" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: - build-info - constraints - docs - if: needs.build-info.outputs.mergeRun == 'true' + if: needs.build-info.outputs.merge-run == 'true' strategy: fail-fast: false matrix: - python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} + python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} platform: ["linux/amd64", "linux/arm64"] env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: "Setup python" - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - run: ./scripts/ci/install_breeze.sh @@ -1690,7 +1719,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" run: breeze pull-image --tag-as-latest env: # Always use default Python version of CI image for preparing packages - PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.default-python-version }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} - name: "Cleanup dist and context file" run: rm -fv ./dist/* ./docker-context-files/* @@ -1711,6 +1740,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Push CI cache ${{ matrix.python-version }} ${{ matrix.platform }}" run: > breeze build-image + --builder airflow_cache --prepare-buildx-cache --force-build --platform ${{ matrix.platform }} @@ -1721,6 +1751,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Push PROD cache ${{ matrix.python-version }} ${{ matrix.platform }}" run: > breeze build-prod-image + --builder airflow_cache --airflow-is-in-context --install-packages-from-context --prepare-buildx-cache @@ -1734,3 +1765,66 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Fix ownership" run: breeze fix-ownership if: always() + + build-ci-arm-images: + timeout-minutes: 120 + name: > + ${{needs.build-info.outputs.build-job-description}} CI ARM images + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} + needs: + - build-info + - wait-for-ci-images + - wait-for-prod-images + - static-checks + - tests-sqlite + - tests-mysql + - tests-mssql + - tests-postgres + env: + DEFAULT_BRANCH: ${{ needs.build-info.outputs.default-branch }} + DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} + DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} + if: needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: needs.build-info.outputs.in-workflow-build == 'true' + - uses: actions/checkout@v2 + with: + ref: ${{ needs.build-info.outputs.targetCommitSha }} + persist-credentials: false + submodules: recursive + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ needs.build-info.outputs.default-python-version }} + if: needs.build-info.outputs.in-workflow-build == 'true' + - run: ./scripts/ci/install_breeze.sh + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: "Free space" + run: breeze free-space + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: "Start ARM instance" + run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: > + Build CI ARM images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + run: > + breeze build-image --run-in-parallel --parallelism 1 + --builder airflow_cache --platform "linux/arm64" + env: + UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} + DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} + IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} + PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} + if: needs.build-info.outputs.in-workflow-build == 'true' + - name: "Stop ARM instance" + run: ./scripts/ci/images/ci_stop_arm_instance.sh + if: always() && needs.build-info.outputs.in-workflow-build == 'true' + - name: "Fix ownership" + run: breeze fix-ownership + if: always() && needs.build-info.outputs.in-workflow-build == 'true' diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 6d6f4d02562d5..4e6c7c83f4dc4 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -43,19 +43,19 @@ jobs: with: fetch-depth: 2 persist-credentials: false + - name: "Setup python" + uses: actions/setup-python@v2 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh - name: Selective checks id: selective-checks env: - EVENT_NAME: ${{ github.event_name }} - TARGET_COMMIT_SHA: ${{ github.sha }} - run: | - if [[ ${EVENT_NAME} == "pull_request" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi + COMMIT_REF: "${{ github.sha }}" + run: breeze selective-check analyze: name: Analyze diff --git a/.github/workflows/label_when_reviewed_workflow_run.yml b/.github/workflows/label_when_reviewed_workflow_run.yml deleted file mode 100644 index 9b11d71ad2498..0000000000000 --- a/.github/workflows/label_when_reviewed_workflow_run.yml +++ /dev/null @@ -1,177 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: Label when reviewed workflow run -on: # yamllint disable-line rule:truthy - workflow_run: - workflows: ["Label when reviewed"] - types: ['requested'] -permissions: - # All other permissions are set to none - checks: write - contents: read - pull-requests: write -jobs: - - label-when-reviewed: - name: "Label PRs when reviewed workflow run" - runs-on: ubuntu-20.04 - outputs: - labelSet: ${{ steps.label-when-reviewed.outputs.labelSet }} - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 - with: - persist-credentials: false - submodules: recursive - - name: "Get information about the original trigger of the run" - uses: ./.github/actions/get-workflow-origin - id: source-run-info - with: - token: ${{ secrets.GITHUB_TOKEN }} - sourceRunId: ${{ github.event.workflow_run.id }} - - name: Initiate Selective Build check - uses: ./.github/actions/checks-action - id: selective-build-check - with: - token: ${{ secrets.GITHUB_TOKEN }} - name: "Selective build check" - status: "in_progress" - sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - output: > - {"summary": - "Checking selective status of the build in - [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) - "} - - name: > - Event: ${{ steps.source-run-info.outputs.sourceEvent }} - Repo: ${{ steps.source-run-info.outputs.sourceHeadRepo }} - Branch: ${{ steps.source-run-info.outputs.sourceHeadBranch }} - Run id: ${{ github.run_id }} - Source Run id: ${{ github.event.workflow_run.id }} - Sha: ${{ github.sha }} - Source Sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - Merge commit Sha: ${{ steps.source-run-info.outputs.mergeCommitSha }} - Target commit Sha: ${{ steps.source-run-info.outputs.targetCommitSha }} - run: printenv - - name: > - Fetch incoming commit ${{ steps.source-run-info.outputs.targetCommitSha }} with its parent - uses: actions/checkout@v2 - with: - ref: ${{ steps.source-run-info.outputs.targetCommitSha }} - fetch-depth: 2 - persist-credentials: false - # checkout the main branch again, to use the right script in main workflow - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 - with: - persist-credentials: false - submodules: recursive - - name: Selective checks - id: selective-checks - env: - EVENT_NAME: ${{ steps.source-run-info.outputs.sourceEvent }} - TARGET_COMMIT_SHA: ${{ steps.source-run-info.outputs.targetCommitSha }} - PR_LABELS: ${{ steps.source-run-info.outputs.pullRequestLabels }} - run: | - if [[ ${EVENT_NAME} == "pull_request_review" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi - - name: "Label when approved by committers for PRs that require full tests" - uses: ./.github/actions/label-when-approved-action - id: label-full-test-prs-when-approved-by-commiters - if: > - steps.selective-checks.outputs.run-tests == 'true' && - contains(steps.selective-checks.outputs.test-types, 'Core') - with: - token: ${{ secrets.GITHUB_TOKEN }} - label: 'full tests needed' - require_committers_approval: 'true' - remove_label_when_approval_missing: 'false' - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - comment: > - The PR most likely needs to run full matrix of tests because it modifies parts of the core - of Airflow. However, committers might decide to merge it quickly and take the risk. - If they don't merge it quickly - please rebase it to the latest main at your convenience, - or amend the last commit of the PR, and push it with --force-with-lease. - - name: "Initiate GitHub Check forcing rerun of SH ${{ github.event.pull_request.head.sha }}" - uses: ./.github/actions/checks-action - id: full-test-check - if: steps.label-full-test-prs-when-approved-by-commiters.outputs.labelSet == 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - name: "Please rebase or amend, and force push the PR to run full tests" - status: "in_progress" - sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - output: > - {"summary": - "The PR likely needs to run all tests! This was determined via selective check in - [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) - "} - - name: "Label when approved by committers for PRs that do not require full tests" - uses: ./.github/actions/label-when-approved-action - id: label-simple-test-prs-when-approved-by-commiters - if: > - steps.selective-checks.outputs.run-tests == 'true' && - ! contains(steps.selective-checks.outputs.test-types, 'Core') - with: - token: ${{ secrets.GITHUB_TOKEN }} - label: 'okay to merge' - require_committers_approval: 'true' - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - comment: > - The PR is likely OK to be merged with just subset of tests for default Python and Database - versions without running the full matrix of tests, because it does not modify the core of - Airflow. If the committers decide that the full tests matrix is needed, they will add the label - 'full tests needed'. Then you should rebase to the latest main or amend the last commit - of the PR, and push it with --force-with-lease. - - name: "Label when approved by committers for PRs that do not require tests at all" - uses: ./.github/actions/label-when-approved-action - id: label-no-test-prs-when-approved-by-commiters - if: steps.selective-checks.outputs.run-tests != 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - label: 'okay to merge' - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - require_committers_approval: 'true' - comment: > - The PR is likely ready to be merged. No tests are needed as no important environment files, - nor python files were modified by it. However, committers might decide that full test matrix is - needed and add the 'full tests needed' label. Then you should rebase it to the latest main - or amend the last commit of the PR, and push it with --force-with-lease. - - name: Update Selective Build check - uses: ./.github/actions/checks-action - if: always() - with: - token: ${{ secrets.GITHUB_TOKEN }} - check_id: ${{ steps.selective-build-check.outputs.check_id }} - status: "completed" - sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - conclusion: ${{ job.status }} - details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - output: > - {"summary": - "Checking selective status of the build completed in - [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) - "} diff --git a/.github/workflows/release_dockerhub_image.yml b/.github/workflows/release_dockerhub_image.yml new file mode 100644 index 0000000000000..3259759cac7d4 --- /dev/null +++ b/.github/workflows/release_dockerhub_image.yml @@ -0,0 +1,153 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: "Release PROD images" +on: # yamllint disable-line rule:truthy + workflow_dispatch: + inputs: + airflowVersion: + description: 'Airflow version' + required: true + skipLatest: + description: 'Skip Latest: Set to true if not latest.' + default: '' + required: false +concurrency: + group: ${{ github.event.inputs.airflowVersion }} + cancel-in-progress: true +jobs: + build-info: + timeout-minutes: 10 + name: "Build Info" + runs-on: ${{ github.repository == 'apache/airflow' && 'self-hosted' || 'ubuntu-20.04' }} + outputs: + pythonVersions: ${{ steps.selective-checks.outputs.python-versions }} + allPythonVersions: ${{ steps.selective-checks.outputs.all-python-versions }} + defaultPythonVersion: ${{ steps.selective-checks.outputs.default-python-version }} + skipLatest: ${{ github.event.inputs.skipLatest == '' && ' ' || '--skip-latest' }} + limitPlatform: ${{ github.repository == 'apache/airflow' && ' ' || '--limit-platform linux/amd64' }} + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + with: + persist-credentials: false + submodules: recursive + - name: "Setup python" + uses: actions/setup-python@v2 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh + - name: Selective checks + id: selective-checks + run: breeze selective-check + release-images: + timeout-minutes: 120 + name: "Release images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }}" + runs-on: ${{ github.repository == 'apache/airflow' && 'self-hosted' || 'ubuntu-20.04' }} + needs: [build-info] + strategy: + fail-fast: false + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} + env: + RUNS_ON: ${{ github.repository == 'apache/airflow' && 'self-hosted' || 'ubuntu-20.04' }} + if: contains(fromJSON('[ + "ashb", + "ephraimbuddy", + "jedcunningham", + "kaxil", + "potiuk", + ]'), github.event.sender.login) + steps: + - name: Cleanup repo + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + with: + persist-credentials: false + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ needs.build-info.outputs.defaultPythonVersion }} + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh + - name: "Free space" + run: breeze free-space + - name: Build CI image for PROD build ${{ needs.build-info.outputs.defaultPythonVersion }} + run: breeze build-image + env: + PYTHON_MAJOR_MINOR_VERSION: ${{ needs.build-info.outputs.defaultPythonVersion }} + - name: "Cleanup dist and context file" + run: rm -fv ./dist/* ./docker-context-files/* + - name: "Start ARM instance" + run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh + if: github.repository == 'apache/airflow' + - name: "Login to docker" + run: > + echo ${{ secrets.DOCKERHUB_TOKEN }} | + docker login --password-stdin --username ${{ secrets.DOCKERHUB_USER }} + - name: > + Release regular images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + run: > + breeze release-prod-images + --dockerhub-repo ${{ github.repository }} + --airflow-version ${{ github.event.inputs.airflowVersion }} + ${{ needs.build-info.outputs.skipLatest }} + ${{ needs.build-info.outputs.limitPlatform }} + --limit-python ${{ matrix.python-version }} + - name: > + Release slim images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + run: > + breeze release-prod-images + --dockerhub-repo ${{ github.repository }} + --airflow-version ${{ github.event.inputs.airflowVersion }} + ${{ needs.build-info.outputs.skipLatest }} + ${{ needs.build-info.outputs.limitPlatform }} + --limit-python ${{ matrix.python-version }} --slim-images + - name: "Stop ARM instance" + run: ./scripts/ci/images/ci_stop_arm_instance.sh + if: always() && github.repository == 'apache/airflow' + - name: > + Verify regular AMD64 image: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + run: > + breeze verify-prod-image + --pull-image + --image-name + ${{github.repository}}:${{github.event.inputs.airflowVersion}}-python${{matrix.python-version}} + - name: > + Verify slim AMD64 image: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + run: > + breeze verify-prod-image + --pull-image + --slim-image + --image-name + ${{github.repository}}:slim-${{github.event.inputs.airflowVersion}}-python${{matrix.python-version}} + - name: "Docker logout" + run: docker logout + if: always() + - name: "Fix ownership" + run: breeze fix-ownership + if: always() diff --git a/.gitignore b/.gitignore index 9a00d53fa3bda..fe3d74aa2344a 100644 --- a/.gitignore +++ b/.gitignore @@ -225,3 +225,7 @@ licenses/LICENSES-ui.txt # Packaged breeze on Windows /breeze.exe + +# Generated out dir + +/out diff --git a/.gitmodules b/.gitmodules index e03978e263653..aa1358f88496d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule ".github/actions/get-workflow-origin"] path = .github/actions/get-workflow-origin url = https://github.com/potiuk/get-workflow-origin -[submodule ".github/actions/checks-action"] - path = .github/actions/checks-action - url = https://github.com/LouisBrunner/checks-action [submodule ".github/actions/configure-aws-credentials"] path = .github/actions/configure-aws-credentials url = https://github.com/aws-actions/configure-aws-credentials @@ -13,6 +10,3 @@ [submodule ".github/actions/github-push-action"] path = .github/actions/github-push-action url = https://github.com/ad-m/github-push-action -[submodule ".github/actions/label-when-approved-action"] - path = .github/actions/label-when-approved-action - url = https://github.com/TobKed/label-when-approved-action diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f4039f1e9050..77da7b91ec919 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: - id: check-hooks-apply name: Check if all hooks apply to the repository - repo: https://github.com/thlorenz/doctoc.git - rev: v2.1.0 + rev: v2.2.0 hooks: - id: doctoc name: Add TOC for md and rst files @@ -39,7 +39,7 @@ repos: - "--maxlevel" - "2" - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.1.13 + rev: v1.2.0 hooks: - id: forbid-tabs name: Fail if tabs are used in the project @@ -159,6 +159,13 @@ repos: hooks: - id: blacken-docs name: Run black on python code blocks in documentation files + args: + - --line-length=110 + - --target-version=py37 + - --target-version=py38 + - --target-version=py39 + - --target-version=py310 + - --skip-string-normalization alias: black additional_dependencies: [black==22.3.0] - repo: https://github.com/pre-commit/pre-commit-hooks @@ -204,7 +211,7 @@ repos: pass_filenames: true # TODO: Bump to Python 3.8 when support for Python 3.7 is dropped in Airflow. - repo: https://github.com/asottile/pyupgrade - rev: v2.32.0 + rev: v2.32.1 hooks: - id: pyupgrade name: Upgrade Python code automatically @@ -324,7 +331,7 @@ repos: files: ^setup\.cfg$|^setup\.py$ pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] - id: check-extra-packages-references name: Checks setup extra packages description: Checks if all the libraries in setup.py are listed in extra-packages-ref.rst file @@ -332,29 +339,29 @@ repos: files: ^setup\.py$|^docs/apache-airflow/extra-packages-ref\.rst$ pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] # This check might be removed when min-airflow-version in providers is 2.2 - - id: check-airflow-2-1-compatibility - name: Check that providers are 2.1 compatible. + - id: check-airflow-2-2-compatibility + name: Check that providers are 2.2 compatible. entry: ./scripts/ci/pre_commit/pre_commit_check_2_1_compatibility.py language: python pass_filenames: true files: ^airflow/providers/.*\.py$ - additional_dependencies: ['rich>=12.4.1'] - - id: update-breeze-file + additional_dependencies: ['rich>=12.4.4'] + - id: update-breeze-cmd-output name: Update output of breeze commands in BREEZE.rst entry: ./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.py language: python files: ^BREEZE\.rst$|^dev/breeze/.*$ pass_filenames: false - additional_dependencies: ['rich>=12.4.1', 'rich-click'] + additional_dependencies: ['rich>=12.4.4', 'rich-click>=1.5'] - id: update-local-yml-file name: Update mounts in the local yml file entry: ./scripts/ci/pre_commit/pre_commit_local_yml_mounts.py language: python files: ^dev/breeze/src/airflow_breeze/utils/docker_command_utils\.py$|^scripts/ci/docker_compose/local\.yml$ pass_filenames: false - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] - id: update-setup-cfg-file name: Update setup.cfg file with all licenses entry: ./scripts/ci/pre_commit/pre_commit_setup_cfg_file.sh @@ -380,7 +387,7 @@ repos: language: python files: ^Dockerfile$ pass_filenames: false - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] - id: update-supported-versions name: Updates supported versions in documentation entry: ./scripts/ci/pre_commit/pre_commit_supported_versions.py @@ -422,11 +429,11 @@ repos: - id: check-no-relative-imports language: pygrep name: No relative imports - description: Airflow style is to use absolute imports only + description: Airflow style is to use absolute imports only (except docs building) entry: "^\\s*from\\s+\\." pass_filenames: true files: \.py$ - exclude: ^tests/|^airflow/_vendor/ + exclude: ^tests/|^airflow/_vendor/|^docs/ - id: check-for-inclusive-language language: pygrep name: Check for language that we do not accept as community @@ -449,6 +456,7 @@ repos: ^airflow/www/static/| ^airflow/providers/| ^tests/providers/apache/cassandra/hooks/test_cassandra.py$| + ^tests/system/providers/apache/spark/example_spark_dag.py$| ^docs/apache-airflow-providers-apache-cassandra/connections/cassandra.rst$| ^docs/apache-airflow-providers-apache-hive/commits.rst$| ^airflow/api_connexion/openapi/v1.yaml$| @@ -602,7 +610,7 @@ repos: - 'jsonschema>=3.2.0,<5.0.0' - 'tabulate==0.8.8' - 'jsonpath-ng==1.5.3' - - 'rich>=12.4.1' + - 'rich>=12.4.4' - id: check-pre-commit-information-consistent name: Update information re pre-commit hooks and verify ids and names entry: ./scripts/ci/pre_commit/pre_commit_check_pre_commit_hooks.py @@ -610,7 +618,7 @@ repos: - --max-length=64 language: python files: ^\.pre-commit-config\.yaml$|^scripts/ci/pre_commit/pre_commit_check_pre_commit_hook_names\.py$ - additional_dependencies: ['pyyaml', 'jinja2', 'black==22.3.0', 'tabulate', 'rich>=12.4.1'] + additional_dependencies: ['pyyaml', 'jinja2', 'black==22.3.0', 'tabulate', 'rich>=12.4.4'] require_serial: true pass_filenames: false - id: check-airflow-providers-have-extras @@ -620,7 +628,7 @@ repos: files: ^setup\.py$|^airflow/providers/.*\.py$ pass_filenames: false require_serial: true - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] - id: update-breeze-readme-config-hash name: Update Breeze README.md with config files hash language: python @@ -635,7 +643,7 @@ repos: files: ^dev/breeze/.*$ pass_filenames: false require_serial: true - additional_dependencies: ['click', 'rich>=12.4.1'] + additional_dependencies: ['click', 'rich>=12.4.4'] - id: check-system-tests-present name: Check if system tests have required segments of code entry: ./scripts/ci/pre_commit/pre_commit_check_system_tests.py @@ -643,7 +651,7 @@ repos: files: ^tests/system/.*/example_[^/]*.py$ exclude: ^tests/system/providers/google/bigquery/example_bigquery_queries\.py$ pass_filenames: true - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] - id: lint-markdown name: Run markdownlint description: Checks the style of Markdown files. @@ -734,7 +742,7 @@ repos: language: python pass_filenames: true files: ^\.github/workflows/.*\.yml$ - additional_dependencies: ['PyYAML', 'rich>=12.4.1'] + additional_dependencies: ['PyYAML', 'rich>=12.4.4'] - id: check-docstring-param-types name: Check that docstrings do not specify param types entry: ./scripts/ci/pre_commit/pre_commit_docstring_param_type.py @@ -742,7 +750,7 @@ repos: pass_filenames: true files: \.py$ exclude: ^airflow/_vendor/ - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4'] - id: lint-chart-schema name: Lint chart/values.schema.json file entry: ./scripts/ci/pre_commit/pre_commit_chart_schema.py @@ -777,7 +785,7 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_mypy.py files: ^dev/.*\.py$ require_serial: true - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: run-mypy name: Run mypy for core language: python @@ -785,14 +793,14 @@ repos: files: \.py$ exclude: ^provider_packages|^docs|^airflow/_vendor/|^airflow/providers|^airflow/migrations|^dev require_serial: true - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: run-mypy name: Run mypy for providers language: python entry: ./scripts/ci/pre_commit/pre_commit_mypy.py --namespace-packages files: ^airflow/providers/.*\.py$ require_serial: true - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: run-mypy name: Run mypy for /docs/ folder language: python @@ -800,7 +808,7 @@ repos: files: ^docs/.*\.py$ exclude: ^docs/rtd-deprecation require_serial: true - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: run-flake8 name: Run flake8 language: python @@ -808,7 +816,7 @@ repos: files: \.py$ pass_filenames: true exclude: ^airflow/_vendor/ - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: lint-javascript name: ESLint against airflow/ui language: python @@ -816,7 +824,7 @@ repos: files: ^airflow/ui/ entry: ./scripts/ci/pre_commit/pre_commit_ui_lint.py pass_filenames: false - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: lint-javascript name: ESLint against current UI JavaScript files language: python @@ -824,12 +832,12 @@ repos: files: ^airflow/www/static/js/ entry: ./scripts/ci/pre_commit/pre_commit_www_lint.py pass_filenames: false - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] - id: update-migration-references name: Update migration ref doc language: python entry: ./scripts/ci/pre_commit/pre_commit_migration_reference.py pass_filenames: false files: ^airflow/migrations/versions/.*\.py$|^docs/apache-airflow/migrations-ref\.rst$ - additional_dependencies: ['rich>=12.4.1'] + additional_dependencies: ['rich>=12.4.4', 'inputimeout'] ## ONLY ADD PRE-COMMITS HERE THAT REQUIRE CI IMAGE diff --git a/BREEZE.rst b/BREEZE.rst index 1ddd32dfbefce..12bdeda0e9ffe 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -426,6 +426,17 @@ of help of the commands only when they change. :width: 100% :alt: Breeze command-hash-export +Regenerating images for documentation +===================================== + +This documentation contains exported images with "help" of their commands and parameters. You can +regenerate all those images (which might be needed in case new version of rich is used) via +``regenerate-breeze-images`` command. + +.. image:: ./images/breeze/output-regenerate-command-images.svg + :width: 100% + :alt: Breeze regenerate-command-images + Starting complete Airflow installation ====================================== @@ -545,13 +556,17 @@ Configuration and maintenance * Cleanup breeze with ``breeze cleanup`` command * Self-upgrade breeze with ``breeze self-upgrade`` command * Setup autocomplete for Breeze with ``breeze setup-autocomplete`` command -* Checking available resources for docker with ``breeze resource-check`` command -* Freeing space needed to run CI tests with ``breeze free-space`` command -* Fixing ownership of files in your repository with ``breeze fix-ownership`` command * Print Breeze version with ``breeze version`` command * Outputs hash of commands defined by ``breeze`` with ``command-hash-export`` (useful to avoid needless regeneration of Breeze images) +CI tasks +-------- +* Freeing space needed to run CI tests with ``breeze free-space`` command +* Fixing ownership of files in your repository with ``breeze fix-ownership`` command +* Checking available resources for docker with ``breeze resource-check`` command +* Deciding which tests should be run with ``breeze selective-check`` command + Release tasks ------------- @@ -1275,8 +1290,8 @@ command but it is very similar to current ``breeze`` command): -Resource check -============== +Running resource check +---------------------- Breeze requires certain resources to be available - disk, memory, CPU. When you enter Breeze's shell, the resources are checked and information if there is enough resources is displayed. However you can @@ -1290,7 +1305,7 @@ Those are all available flags of ``resource-check`` command: Freeing the space -================= +----------------- When our CI runs a job, it needs all memory and disk it can have. We have a Breeze command that frees the memory and disk space used. You can also use it clear space locally but it performs a few operations @@ -1303,8 +1318,26 @@ Those are all available flags of ``free-space`` command: :alt: Breeze free-space +Selective check +--------------- + +When our CI runs a job, it needs to decide which tests to run, whether to build images and how much the test +should be run on multiple combinations of Python, Kubernetes, Backend versions. In order to optimize time +needed to run the CI Builds. You can also use the tool to test what tests will be run when you provide +a specific commit that Breeze should run the tests on. + +More details about the algorithm used to pick the right tests can be +found in `Selective Checks `_. + +Those are all available flags of ``selective-check`` command: + +.. image:: ./images/breeze/output-selective-check.svg + :width: 100% + :alt: Breeze selective-check + + Tracking backtracking issues for CI builds -========================================== +------------------------------------------ When our CI runs a job, we automatically upgrade our dependencies in the ``main`` build. However, this might lead to conflicts and ``pip`` backtracking for a long time (possibly forever) for dependency resolution. diff --git a/CI.rst b/CI.rst index f24639271e977..c058598449299 100644 --- a/CI.rst +++ b/CI.rst @@ -96,7 +96,7 @@ You can read more about Breeze in `BREEZE.rst `_ but in essence it i you to re-create CI environment in your local development instance and interact with it. In its basic form, when you do development you can run all the same tests that will be run in CI - but locally, before you submit them as PR. Another use case where Breeze is useful is when tests fail on CI. You can -take the full ``COMMIT_SHA`` of the failed build pass it as ``--github-image-id`` parameter of Breeze and it will +take the full ``COMMIT_SHA`` of the failed build pass it as ``--image-tag`` parameter of Breeze and it will download the very same version of image that was used in CI and run it locally. This way, you can very easily reproduce any failed test that happens in CI - even if you do not check out the sources connected with the run. @@ -275,7 +275,7 @@ You can use those variables when you try to reproduce the build locally. | | | | | should set it to false, especially | | | | | | in case our local sources are not the | | | | | | ones we intend to use (for example | -| | | | | when ``--github-image-id`` is used | +| | | | | when ``--image-tag`` is used | | | | | | in Breeze. | | | | | | | | | | | | In CI jobs it is set to true | @@ -426,12 +426,6 @@ CI, Production Images as well as base Python images that are also cached in the Also for those builds we only execute Python tests if important files changed (so for example if it is "no-code" change, no tests will be executed. -The workflow involved in Pull Requests review and approval is a bit more complex than simple workflows -in most of other projects because we've implemented some optimizations related to efficient use -of queue slots we share with other Apache Software Foundation projects. More details about it -can be found in `PULL_REQUEST_WORKFLOW.rst `_. - - Direct Push/Merge Run --------------------- @@ -668,12 +662,11 @@ For example knowing that the CI job was for commit ``cd27124534b46c9688a1d89e75f But you usually need to pass more variables and complex setup if you want to connect to a database or enable some integrations. Therefore it is easiest to use `Breeze `_ for that. For example if -you need to reproduce a MySQL environment with kerberos integration enabled for commit -cd27124534b46c9688a1d89e75fcd137ab5137e3, in python 3.8 environment you can run: +you need to reproduce a MySQL environment in python 3.8 environment you can run: .. code-block:: bash - ./breeze-legacy --github-image-id cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.8 + breeze --image-tag cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.8 --backend mysql You will be dropped into a shell with the exact version that was used during the CI run and you will be able to run pytest tests manually, easily reproducing the environment that was used in CI. Note that in diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f862d33b1963a..1904c15abe6e7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -360,33 +360,6 @@ Step 4: Prepare PR PR guidelines described in `pull request guidelines <#pull-request-guidelines>`_. Create Pull Request! Make yourself ready for the discussion! -5. Depending on "scope" of your changes, your Pull Request might go through one of few paths after approval. - We run some non-standard workflow with high degree of automation that allows us to optimize the usage - of queue slots in GitHub Actions. Our automated workflows determine the "scope" of changes in your PR - and send it through the right path: - - * In case of a "no-code" change, approval will generate a comment that the PR can be merged and no - tests are needed. This is usually when the change modifies some non-documentation related RST - files (such as this file). No python tests are run and no CI images are built for such PR. Usually - it can be approved and merged few minutes after it is submitted (unless there is a big queue of jobs). - - * In case of change involving python code changes or documentation changes, a subset of full test matrix - will be executed. This subset of tests perform relevant tests for single combination of python, backend - version and only builds one CI image and one PROD image. Here the scope of tests depends on the - scope of your changes: - - * when your change does not change "core" of Airflow (Providers, CLI, WWW, Helm Chart) you will get the - comment that PR is likely ok to be merged without running "full matrix" of tests. However decision - for that is left to committer who approves your change. The committer might set a "full tests needed" - label for your PR and ask you to rebase your request or re-run all jobs. PRs with "full tests needed" - run full matrix of tests. - - * when your change changes the "core" of Airflow you will get the comment that PR needs full tests and - the "full tests needed" label is set for your PR. Additional check is set that prevents from - accidental merging of the request until full matrix of tests succeeds for the PR. - - More details about the PR workflow be found in `PULL_REQUEST_WORKFLOW.rst `_. - Step 5: Pass PR Review ---------------------- diff --git a/Dockerfile b/Dockerfile index 39049dcb4d4ea..ce9b7d5b414df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1338,7 +1338,7 @@ ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ AIRFLOW_IS_IN_CONTEXT=${AIRFLOW_IS_IN_CONTEXT} \ EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} -WORKDIR /opt/airflow +WORKDIR ${AIRFLOW_HOME} COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ install_additional_dependencies.sh /scripts/docker/ @@ -1473,7 +1473,7 @@ COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scrip # We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an # unexpected result - the cache for Dockerfiles might get invalidated in case the host system # had different umask set and group x bit was not set. In Azure the bit might be not set at all. -# That also protects against AUFS Docker backen dproblem where changing the executable bit required sync +# That also protects against AUFS Docker backend problem where changing the executable bit required sync RUN bash /scripts/docker/install_mysql.sh prod \ && bash /scripts/docker/install_mssql.sh \ && bash /scripts/docker/install_postgres.sh prod \ diff --git a/Dockerfile.ci b/Dockerfile.ci index 41aaf579ec749..337901d4a817b 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -686,9 +686,15 @@ if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then echo "${COLOR_BLUE}Uninstalling airflow and providers" echo uninstall_airflow_and_providers - echo "${COLOR_BLUE}Install airflow from wheel package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" - echo - install_airflow_from_wheel "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + if [[ ${SKIP_CONSTRAINTS,,=} == "true" ]]; then + echo "${COLOR_BLUE}Install airflow from wheel package with extras: '${AIRFLOW_EXTRAS}' with no constraints.${COLOR_RESET}" + echo + install_airflow_from_wheel "${AIRFLOW_EXTRAS}" "none" + else + echo "${COLOR_BLUE}Install airflow from wheel package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" + echo + install_airflow_from_wheel "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + fi uninstall_providers elif [[ ${USE_AIRFLOW_VERSION} == "sdist" ]]; then echo @@ -696,9 +702,15 @@ if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then echo uninstall_airflow_and_providers echo - echo "${COLOR_BLUE}Install airflow from sdist package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" - echo - install_airflow_from_sdist "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + if [[ ${SKIP_CONSTRAINTS,,=} == "true" ]]; then + echo "${COLOR_BLUE}Install airflow from sdist package with extras: '${AIRFLOW_EXTRAS}' with no constraints.${COLOR_RESET}" + echo + install_airflow_from_sdist "${AIRFLOW_EXTRAS}" "none" + else + echo "${COLOR_BLUE}Install airflow from sdist package with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" + echo + install_airflow_from_sdist "${AIRFLOW_EXTRAS}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + fi uninstall_providers else echo @@ -706,9 +718,15 @@ if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then echo uninstall_airflow_and_providers echo - echo "${COLOR_BLUE}Install released airflow from PyPI with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" - echo - install_released_airflow_version "${USE_AIRFLOW_VERSION}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + if [[ ${SKIP_CONSTRAINTS,,=} == "true" ]]; then + echo "${COLOR_BLUE}Install released airflow from PyPI with extras: '${AIRFLOW_EXTRAS}' with no constraints.${COLOR_RESET}" + echo + install_released_airflow_version "${USE_AIRFLOW_VERSION}" "none" + else + echo "${COLOR_BLUE}Install released airflow from PyPI with extras: '${AIRFLOW_EXTRAS}' and constraints reference ${AIRFLOW_CONSTRAINTS_REFERENCE}.${COLOR_RESET}" + echo + install_released_airflow_version "${USE_AIRFLOW_VERSION}" "${AIRFLOW_CONSTRAINTS_REFERENCE}" + fi fi if [[ ${USE_PACKAGES_FROM_DIST=} == "true" ]]; then echo @@ -803,7 +821,7 @@ if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} != "true" ]]; then cd "${AIRFLOW_SOURCES}" if [[ ${START_AIRFLOW:="false"} == "true" || ${START_AIRFLOW} == "True" ]]; then - export AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS=${LOAD_DEFAULT_CONNECTIONS} + export AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS=${LOAD_DEFAULT_CONNECTIONS} export AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES} # shellcheck source=scripts/in_container/bin/run_tmux exec run_tmux diff --git a/IMAGES.rst b/IMAGES.rst index 58ef0cca54852..634c4ac8a0e12 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -281,7 +281,7 @@ to refresh them. Every developer can also pull and run images being result of a specific CI run in GitHub Actions. This is a powerful tool that allows to reproduce CI failures locally, enter the images and fix them much -faster. It is enough to pass ``--github-image-id`` and the registry and Breeze will download and execute +faster. It is enough to pass ``--image-tag`` and the registry and Breeze will download and execute commands using the same image that was used during the CI tests. For example this command will run the same Python 3.8 image as was used in build identified with @@ -289,8 +289,7 @@ For example this command will run the same Python 3.8 image as was used in build .. code-block:: bash - ./breeze-legacy --github-image-id 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e \ - --python 3.8 --integration rabbitmq + breeze --image-tag 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e --python 3.8 --integration rabbitmq You can see more details and examples in `Breeze `_ diff --git a/NOTICE b/NOTICE index 4c7b795d88ce6..84c77cd4fc12c 100644 --- a/NOTICE +++ b/NOTICE @@ -20,3 +20,10 @@ This product contains a modified portion of 'Flask App Builder' developed by Dan (https://github.com/dpgaspar/Flask-AppBuilder). * Copyright 2013, Daniel Vaz Gaspar + +Chakra UI: +----- +This product contains a modified portion of 'Chakra UI' developed by Segun Adebayo. +(https://github.com/chakra-ui/chakra-ui). + +* Copyright 2019, Segun Adebayo diff --git a/PULL_REQUEST_WORKFLOW.rst b/PULL_REQUEST_WORKFLOW.rst deleted file mode 100644 index d7ca2f9b93eaa..0000000000000 --- a/PULL_REQUEST_WORKFLOW.rst +++ /dev/null @@ -1,158 +0,0 @@ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -.. contents:: :local: - -Why non-standard pull request workflow? ---------------------------------------- - -This document describes the Pull Request Workflow we've implemented in Airflow. The workflow is slightly -more complex than regular workflow you might encounter in most of the projects because after experiencing -some huge delays in processing queues in October 2020 with GitHub Actions, we've decided to optimize the -workflow to minimize the use of GitHub Actions build time by utilising selective approach on which tests -and checks in the CI system are run depending on analysis of which files changed in the incoming PR and -allowing the Committers to control the scope of the tests during the approval/review process. - -Just to give a bit of context, we started off with the approach that we always run all tests for all the -incoming PRs, however due to our matrix of tests growing, this approach did not scale with the increasing -number of PRs and when we had to compete with other Apache Software Foundation projects for the 180 -slots that are available for the whole organization. More Apache Software Foundation projects started -to use GitHub Actions and we've started to experience long queues when our jobs waited for free slots. - -We approached the problem by: - -1) Improving mechanism of cancelling duplicate workflow runs more efficiently in case of queue conditions - (duplicate workflow runs are generated when someone pushes a fixup quickly - leading to running both - out-dated and current run to completion, taking precious slots. This has been implemented by improving - `cancel-workflow-run `_ action we are using. In version - 4.1 it got a new feature of cancelling all duplicates even if there is a long queue of builds. - -2) Heavily decreasing strain on the GitHub Actions jobs by introducing selective checks - mechanism - to control which parts of the tests are run during the tests. This is implemented by the - ``scripts/ci/selective_ci_checks.sh`` script in our repository. This script analyses which part of the - code has changed and based on that it sets the right outputs that control which tests are executed in - the ``Tests`` workflow, and whether we need to build CI images necessary to run those steps. This allowed to - heavily decrease the strain especially for the Pull Requests that were not touching code (in which case - the builds can complete in < 2 minutes) but also by limiting the number of tests executed in PRs that do - not touch the "core" of Airflow, or only touching some - standalone - parts of Airflow such as - "Providers", "WWW" or "CLI". This solution is not yet perfect as there are likely some edge cases but - it is easy to maintain and we have an escape-hatch - all the tests are always executed in main pushes, - so contributors can easily spot if there is a "missed" case and fix it - both by fixing the problem and - adding those exceptions to the code. More about it can be found in `Selective checks `_ - -3) Even more optimisation came from limiting the scope of tests to only "default" matrix parameters. So far - in Airflow we always run all tests for all matrix combinations. The primary matrix components are: - - * Python versions (currently 3.7, 3.8, 3.9, 3.10) - * Backend types (currently MySQL/Postgres) - * Backed version (currently MySQL 5.7, MySQL 8, Postgres 13 - - We've decided that instead of running all the combinations of parameters for all matrix component we will - only run default values (Python 3.7, Mysql 5.7, Postgres 13) for all PRs which are not approved yet by - the committers. This has a nice effect, that full set of tests (though with limited combinations of - the matrix) are still run in the CI for every Pull Request that needs tests at all - allowing the - contributors to make sure that their PR is "good enough" to be reviewed. - - Even after approval, the automated workflows we've implemented, check if the PR seems to need - "full test matrix" and provide helpful information to both contributors and committers in the form of - explanatory comments and labels set automatically showing the status of the PR. Committers have still - control whether they want to merge such requests automatically or ask for rebase or re-run the tests - and run "full tests" by applying the "full tests needed" label and re-running such request. - The "full tests needed" label is also applied automatically after approval when the change touches - the "core" of Airflow - also a separate check is added to the PR so that the "merge" button status - will indicate to the committer that full tests are still needed. The committer might still decide, - whether to merge such PR without the "full matrix". The "escape hatch" we have - i.e. running the full - matrix of tests in the "merge push" will enable committers to catch and fix such problems quickly. - More about it can be found in `Approval workflow and Matrix tests <#approval-workflow-and-matrix-tests>`_ - chapter. - -4) We've also applied (and received) funds to run self-hosted runners. They are used for ``main`` runs - and whenever the PRs are done by one of the maintainers. Maintainers can force using Public GitHub runners - by applying "use public runners" label to the PR before submitting it. - - -Approval Workflow and Matrix tests ----------------------------------- - -As explained above the approval and matrix tests workflow works according to the algorithm below: - -1) In case of "no-code" changes - so changes that do not change any of the code or environment of - the application, no test are run (this is done via selective checks). Also no CI/PROD images are - build saving extra minutes. Such build takes less than 2 minutes currently and only few jobs are run - which is a very small fraction of the "full build" time. - -2) When new PR is created, only a "default set" of matrix test are running. Only default - values for each of the parameters are used effectively limiting it to running matrix builds for only - one python version and one version of each of the backends. In this case only one CI and one PROD - image is built, saving precious job slots. This build takes around 50% less time than the "full matrix" - build. - -3) When such PR gets approved, the system further analyses the files changed in this PR and further - decision is made that should be communicated to both Committer and Reviewer. - -3a) In case of "no-code" builds, a message is communicated that the PR is ready to be merged and - no tests are needed. - -.. image:: images/pr/pr-no-tests-needed-comment.png - :align: center - :alt: No tests needed for "no-code" builds - -3b) In case of "non-core" builds a message is communicated that such PR is likely OK to be merged as is with - limited set of tests, but that the committer might decide to re-run the PR after applying - "full tests needed" label, which will trigger full matrix build for tests for this PR. The committer - might make further decision on what to do with this PR. - -.. image:: images/pr/pr-likely-ok-to-merge.png - :align: center - :alt: Likely ok to merge the PR with only small set of tests - -3c) In case of "core" builds (i. e. when the PR touches some "core" part of Airflow) a message is - communicated that this PR needs "full test matrix", the "full tests needed" label is applied - automatically and either the contributor might rebase the request to trigger full test build or the - committer might re-run the build manually to trigger such full test rebuild. Also a check "in-progress" - is added, so that the committer realises that the PR is not yet "green to merge". Pull requests with - "full tests needed" label always trigger the full matrix build when rebased or re-run so if the - PR gets rebased, it will continue triggering full matrix build. - -.. image:: images/pr/pr-full-tests-needed.png - :align: center - :alt: Full tests are needed for the PR - -4) If this or another committer "request changes" in a previously approved PR with "full tests needed" - label, the bot automatically removes the label, moving it back to "run only default set of parameters" - mode. For PRs touching core of airflow once the PR gets approved back, the label will be restored. - If it was manually set by the committer, it has to be restored manually. - -.. note:: Note that setting the labels and adding comments might be delayed, due to limitation of GitHub Actions, - in case of queues, processing of Pull Request reviews might take some time, so it is advised not to merge - PR immediately after approval. Luckily, the comments describing the status of the PR trigger notifications - for the PRs and they provide good "notification" for the committer to act on a PR that was recently - approved. - -The PR approval workflow is possible thanks to two custom GitHub Actions we've developed: - -* `Get workflow origin `_ -* `Label when approved `_ - - -Next steps ----------- - -We are planning to also propose the approach to other projects from Apache Software Foundation to -make it a common approach, so that our effort is not limited only to one project. - -Discussion about it in `this discussion `_ diff --git a/README.md b/README.md index 3f38a69808222..bc2957eb9e41a 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ Airflow is not a streaming solution, but it is often used to process real-time d Apache Airflow is tested with: -| | Main version (dev) | Stable version (2.3.2) | +| | Main version (dev) | Stable version (2.3.3) | |---------------------|------------------------------|------------------------------| | Python | 3.7, 3.8, 3.9, 3.10 | 3.7, 3.8, 3.9, 3.10 | | Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | @@ -160,15 +160,15 @@ them to the appropriate format and workflow that your tool requires. ```bash -pip install 'apache-airflow==2.3.2' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.2/constraints-3.7.txt" +pip install 'apache-airflow==2.3.3' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.7.txt" ``` 2. Installing with extras (i.e., postgres, google) ```bash -pip install 'apache-airflow[postgres,google]==2.3.2' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.2/constraints-3.7.txt" +pip install 'apache-airflow[postgres,google]==2.3.3' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.7.txt" ``` For information on installing provider packages, check @@ -273,7 +273,7 @@ Apache Airflow version life cycle: | Version | Current Patch/Minor | State | First Release | Limited Support | EOL/Terminated | |-----------|-----------------------|-----------|-----------------|-------------------|------------------| -| 2 | 2.3.2 | Supported | Dec 17, 2020 | TBD | TBD | +| 2 | 2.3.3 | Supported | Dec 17, 2020 | TBD | TBD | | 1.10 | 1.10.15 | EOL | Aug 27, 2018 | Dec 17, 2020 | June 17, 2021 | | 1.9 | 1.9.0 | EOL | Jan 03, 2018 | Aug 27, 2018 | Aug 27, 2018 | | 1.8 | 1.8.2 | EOL | Mar 19, 2017 | Jan 03, 2018 | Jan 03, 2018 | @@ -303,7 +303,7 @@ They are based on the official release schedule of Python and Kubernetes, nicely 2. The "oldest" supported version of Python/Kubernetes is the default one until we decide to switch to later version. "Default" is only meaningful in terms of "smoke tests" in CI PRs, which are run using this default version and the default reference image available. Currently `apache/airflow:latest` - and `apache/airflow:2.3.2` images are Python 3.7 images. This means that default reference image will + and `apache/airflow:2.3.3` images are Python 3.7 images. This means that default reference image will become the default at the time when we start preparing for dropping 3.7 support which is few months before the end of life for Python 3.7. diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 3ec086d249391..ed9ea874af846 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -21,7 +21,125 @@ .. towncrier release notes start -Airflow 2.3.2 (2021-06-04) + +Airflow 2.3.3 (2022-07-05) +-------------------------- + +Significant Changes +^^^^^^^^^^^^^^^^^^^ + +We've upgraded Flask App Builder to a major version 4.* (#24399) +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Flask App Builder is one of the important components of Airflow Webserver, as +it uses a lot of dependencies that are essential to run the webserver and integrate it +in enterprise environments - especially authentication. + +The FAB 4.* upgrades a number of dependencies to major releases, which upgrades them to versions +that have a number of security issues fixed. A lot of tests were performed to bring the dependencies +in a backwards-compatible way, however the dependencies themselves implement breaking changes in their +internals so it might be that some of those changes might impact the users in case they are using the +libraries for their own purposes. + +One important change that you likely will need to apply to Oauth configuration is to add +``server_metadata_url`` or ``jwks_uri`` and you can read about it more +in `this issue `_. + +Here is the list of breaking changes in dependencies that comes together with FAB 4: + + * ``Flask`` from 1.X to 2.X `breaking changes `__ + + * ``flask-jwt-extended`` 3.X to 4.X `breaking changes: `__ + + * ``Jinja2`` 2.X to 3.X `breaking changes: `__ + + * ``Werkzeug`` 1.X to 2.X `breaking changes `__ + + * ``pyJWT`` 1.X to 2.X `breaking changes: `__ + + * ``Click`` 7.X to 8.X `breaking changes: `__ + + * ``itsdangerous`` 1.X to 2.X `breaking changes `__ + +Bug Fixes +^^^^^^^^^ + +- Allow for ``LOGGING_LEVEL=DEBUG`` (#23360) +- Fix grid date ticks (#24738) +- Debounce status highlighting in Grid view (#24710) +- Fix Grid vertical scrolling (#24684) +- don't try to render child rows for closed groups (#24637) +- Do not calculate grid root instances (#24528) +- Maintain grid view selection on filtering upstream (#23779) +- Speed up ``grid_data`` endpoint by 10x (#24284) +- Apply per-run log templates to log handlers (#24153) +- Don't crash scheduler if exec config has old k8s objects (#24117) +- ``TI.log_url`` fix for ``map_index`` (#24335) +- Fix migration ``0080_2_0_2`` - Replace null values before setting column not null (#24585) +- Patch ``sql_alchemy_conn`` if old Postgres schemes used (#24569) +- Seed ``log_template`` table (#24511) +- Fix deprecated ``log_id_template`` value (#24506) +- Fix toast messages (#24505) +- Add indexes for CASCADE deletes for ``task_instance`` (#24488) +- Return empty dict if Pod JSON encoding fails (#24478) +- Improve grid rendering performance with a custom tooltip (#24417, #24449) +- Check for ``run_id`` for grid group summaries (#24327) +- Optimize calendar view for cron scheduled DAGs (#24262) +- Use ``get_hostname`` instead of ``socket.getfqdn`` (#24260) +- Check that edge nodes actually exist (#24166) +- Fix ``useTasks`` crash on error (#24152) +- Do not fail re-queued TIs (#23846) +- Reduce grid view API calls (#24083) +- Rename Permissions to Permission Pairs. (#24065) +- Replace ``use_task_execution_date`` with ``use_task_logical_date`` (#23983) +- Grid fix details button truncated and small UI tweaks (#23934) +- Add TaskInstance State ``REMOVED`` to finished states and success states (#23797) +- Fix mapped task immutability after clear (#23667) +- Fix permission issue for dag that has dot in name (#23510) +- Fix closing connection ``dbapi.get_pandas_df`` (#23452) +- Check bag DAG ``schedule_interval`` match timetable (#23113) +- Parse error for task added to multiple groups (#23071) +- Fix flaky order of returned dag runs (#24405) +- Migrate ``jsx`` files that affect run/task selection to ``tsx`` (#24509) +- Fix links to sources for examples (#24386) +- Set proper ``Content-Type`` and ``chartset`` on ``grid_data`` endpoint (#24375) + +Doc only changes +^^^^^^^^^^^^^^^^ + +- Update templates doc to mention ``extras`` and format Airflow ``Vars`` / ``Conns`` (#24735) +- Document built in Timetables (#23099) +- Alphabetizes two tables (#23923) +- Clarify that users should not use Maria DB (#24556) +- Add imports to deferring code samples (#24544) +- Add note about image regeneration in June 2022 (#24524) +- Small cleanup of ``get_current_context()`` chapter (#24482) +- Fix default 2.2.5 ``log_id_template`` (#24455) +- Update description of installing providers separately from core (#24454) +- Mention context variables and logging (#24304) + +Misc/Internal +^^^^^^^^^^^^^ + +- Remove internet explorer support (#24495) +- Removing magic status code numbers from ``api_connexion`` (#24050) +- Upgrade FAB to ``4.1.2`` (#24619) +- Switch Markdown engine to ``markdown-it-py`` (#19702) +- Update ``rich`` to latest version across the board. (#24186) +- Get rid of ``TimedJSONWebSignatureSerializer`` (#24519) +- Update flask-appbuilder ``authlib``/ ``oauth`` dependency (#24516) +- Upgrade to ``webpack`` 5 (#24485) +- Add ``typescript`` (#24337) +- The JWT claims in the request to retrieve logs have been standardized: we use ``nbf`` and ``aud`` claims for + maturity and audience of the requests. Also "filename" payload field is used to keep log name. (#24519) +- Address all ``yarn`` test warnings (#24722) +- Upgrade to react 18 and chakra 2 (#24430) +- Refactor ``DagRun.verify_integrity`` (#24114) +- Upgrade FAB to ``4.1.1`` (#24399) +- We now need at least ``Flask-WTF 0.15`` (#24621) + + +Airflow 2.3.2 (2022-06-04) -------------------------- No significant changes @@ -256,7 +374,7 @@ containing your previous ``log_id_template`` and ``log_filename_template``. For .. code-block:: sql - INSERT INTO log_template (id, filename, elasticsearch_id, created_at) VALUES (0, '{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log', '{dag_id}_{task_id}_{run_id}_{try_number}', NOW()); + INSERT INTO log_template (id, filename, elasticsearch_id, created_at) VALUES (0, '{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log', '{dag_id}-{task_id}-{execution_date}-{try_number}', NOW()); BaseOperatorLink's ``get_link`` method changed to take a ``ti_key`` keyword argument (#21798) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -704,6 +822,7 @@ Misc/Internal - Remove deprecated usage of ``init_role()`` from API (#18820) - Remove duplicate code on dbapi hook (#18821) + Airflow 2.2.5, (2022-04-04) --------------------------- @@ -6866,9 +6985,7 @@ New signature: .. code-block:: python - def wait_for_transfer_job( - self, job, expected_statuses=(GcpTransferOperationStatus.SUCCESS,) - ): + def wait_for_transfer_job(self, job, expected_statuses=(GcpTransferOperationStatus.SUCCESS,)): ... The behavior of ``wait_for_transfer_job`` has changed: @@ -7836,7 +7953,6 @@ There are five roles created for Airflow by default: Admin, User, Op, Viewer, an Breaking changes ~~~~~~~~~~~~~~~~ - * AWS Batch Operator renamed property queue to job_queue to prevent conflict with the internal queue from CeleryExecutor - AIRFLOW-2542 * Users created and stored in the old users table will not be migrated automatically. FAB's built-in authentication support must be reconfigured. * Airflow dag home page is now ``/home`` (instead of ``/admin``\ ). diff --git a/SELECTIVE_CHECKS.md b/SELECTIVE_CHECKS.md deleted file mode 100644 index 3a92d9c817987..0000000000000 --- a/SELECTIVE_CHECKS.md +++ /dev/null @@ -1,144 +0,0 @@ - - -# Selective CI Checks - -In order to optimise our CI jobs, we've implemented optimisations to only run selected checks for some -kind of changes. The logic implemented reflects the internal architecture of Airflow 2.0 packages -and it helps to keep down both the usage of jobs in GitHub Actions as well as CI feedback time to -contributors in case of simpler changes. - -We have the following test types (separated by packages in which they are): - -* Always - those are tests that should be always executed (always folder) -* Core - for the core Airflow functionality (core folder) -* API - Tests for the Airflow API (api and api_connexion folders) -* CLI - Tests for the Airflow CLI (cli folder) -* WWW - Tests for the Airflow webserver (www folder) -* Providers - Tests for all Providers of Airflow (providers folder) -* Other - all other tests (all other folders that are not part of any of the above) - -We also have several special kinds of tests that are not separated by packages but they are marked with -pytest markers. They can be found in any of those packages and they can be selected by the appropriate -pytest custom command line options. See `TESTING.rst `_ for details but those are: - -* Integration - tests that require external integration images running in docker-compose -* Quarantined - tests that are flaky and need to be fixed -* Postgres - tests that require Postgres database. They are only run when backend is Postgres -* MySQL - tests that require MySQL database. They are only run when backend is MySQL - -Even if the types are separated, In case they share the same backend version/python version, they are -run sequentially in the same job, on the same CI machine. Each of them in a separate `docker run` command -and with additional docker cleaning between the steps to not fall into the trap of exceeding resource -usage in one big test run, but also not to increase the number of jobs per each Pull Request. - -The logic implemented for the changes works as follows: - -1) In case of direct push (so when PR gets merged) or scheduled run, we always run all tests and checks. - This is in order to make sure that the merge did not miss anything important. The remainder of the logic - is executed only in case of Pull Requests. We do not add providers tests in case DEFAULT_BRANCH is - different than main, because providers are only important in main branch and PRs to main branch. - -2) We retrieve which files have changed in the incoming Merge Commit (github.sha is a merge commit - automatically prepared by GitHub in case of Pull Request, so we can retrieve the list of changed - files from that commit directly). - -3) If any of the important, environment files changed (Dockerfile, ci scripts, setup.py, GitHub workflow - files), then we again run all tests and checks. Those are cases where the logic of the checks changed - or the environment for the checks changed so we want to make sure to check everything. We do not add - providers tests in case DEFAULT_BRANCH is different than main, because providers are only - important in main branch and PRs to main branch. - -4) If any of py files changed: we need to have CI image and run full static checks so we enable image building - -5) If any of docs changed: we need to have CI image so we enable image building - -6) If any of chart files changed, we need to run helm tests so we enable helm unit tests - -7) If any of API files changed, we need to run API tests so we enable them - -8) If any of the relevant source files that trigger the tests have changed at all. Those are airflow - sources, chart, tests and kubernetes_tests. If any of those files changed, we enable tests and we - enable image building, because the CI images are needed to run tests. - -9) Then we determine which types of the tests should be run. We count all the changed files in the - relevant airflow sources (airflow, chart, tests, kubernetes_tests) first and then we count how many - files changed in different packages: - - * in any case tests in `Always` folder are run. Those are special tests that should be run any time - modifications to any Python code occurs. Example test of this type is verifying proper structure of - the project including proper naming of all files. - * if any of the Airflow API files changed we enable `API` test type - * if any of the Airflow CLI files changed we enable `CLI` test type and Kubernetes tests (the - K8S tests depend on CLI changes as helm chart uses CLI to run Airflow). - * if this is a main branch and if any of the Provider files changed we enable `Providers` test type - * if any of the WWW files changed we enable `WWW` test type - * if any of the Kubernetes files changed we enable `Kubernetes` test type - * Then we subtract count of all the `specific` above per-type changed files from the count of - all changed files. In case there are any files changed, then we assume that some unknown files - changed (likely from the core of airflow) and in this case we enable all test types above and the - Core test types - simply because we do not want to risk to miss anything. - * In all cases where tests are enabled we also add Integration and - depending on - the backend used = Postgres or MySQL types of tests. - -10) Quarantined tests are always run when tests are run - we need to run them often to observe how - often they fail so that we can decide to move them out of quarantine. Details about the - Quarantined tests are described in `TESTING.rst `_ - -11) There is a special case of static checks. In case the above logic determines that the CI image - needs to be built, we run long and more comprehensive version of static checks - including - Mypy, Flake8. And those tests are run on all files, no matter how many files changed. - In case the image is not built, we run only simpler set of changes - the longer static checks - that require CI image are skipped, and we only run the tests on the files that changed in the incoming - commit - unlike flake8/mypy, those static checks are per-file based and they should not miss any - important change. - -Similarly to selective tests we also run selective security scans. In Pull requests, -the Python scan will only run when there is a python code change and JavaScript scan will only run if -there is a JavaScript or `yarn.lock` file change. For main builds, all scans are always executed. - -The selective check algorithm is shown here: - - -````mermaid -flowchart TD -A(PR arrives)-->B[Selective Check] -B-->C{Direct push merge?} -C-->|Yes| N[Enable images] -N-->D(Run Full Test
+Quarantined
Run full static checks) -C-->|No| E[Retrieve changed files] -E-->F{Environment files changed?} -F-->|Yes| N -F-->|No| G{Docs changed} -G-->|Yes| O[Enable images building] -O-->I{Chart files changed?} -G-->|No| I -I-->|Yes| P[Enable helm tests] -P-->J{API files changed} -I-->|No| J -J-->|Yes| Q[Enable API tests] -Q-->H{Sources changed?} -J-->|No| H -H-->|Yes| R[Enable Pytests] -R-->K[Determine test type] -K-->S{Core files changed} -S-->|Yes| N -S-->|No| M(Run selected test+
Integration, Quarantined
Full static checks) -H-->|No| L[Skip running test
Run subset of static checks] -``` diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index e2b57eae6c69b..b5b5e37ecb8d4 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -136,7 +136,7 @@ require Breeze Docker image to be build locally. +--------------------------------------------------------+------------------------------------------------------------------+---------+ | blacken-docs | Run black on python code blocks in documentation files | | +--------------------------------------------------------+------------------------------------------------------------------+---------+ -| check-airflow-2-1-compatibility | Check that providers are 2.1 compatible. | | +| check-airflow-2-2-compatibility | Check that providers are 2.2 compatible. | | +--------------------------------------------------------+------------------------------------------------------------------+---------+ | check-airflow-config-yaml-consistent | Checks for consistency between config.yml and default_config.cfg | | +--------------------------------------------------------+------------------------------------------------------------------+---------+ @@ -286,7 +286,7 @@ require Breeze Docker image to be build locally. +--------------------------------------------------------+------------------------------------------------------------------+---------+ | trailing-whitespace | Remove trailing whitespace at end of line | | +--------------------------------------------------------+------------------------------------------------------------------+---------+ -| update-breeze-file | Update output of breeze commands in BREEZE.rst | | +| update-breeze-cmd-output | Update output of breeze commands in BREEZE.rst | | +--------------------------------------------------------+------------------------------------------------------------------+---------+ | update-breeze-readme-config-hash | Update Breeze README.md with config files hash | | +--------------------------------------------------------+------------------------------------------------------------------+---------+ diff --git a/TESTING.rst b/TESTING.rst index 12983726e1ebb..4f7de58b76766 100644 --- a/TESTING.rst +++ b/TESTING.rst @@ -182,6 +182,21 @@ You can also specify individual tests or a group of tests: breeze tests --db-reset tests/core/test_core.py::TestCore +You can also limit the tests to execute to specific group of tests + +.. code-block:: bash + + breeze tests --test-type Core + + +You can also write tests in "limited progress" mode (useful in the future to run CI). In this mode each +test just prints "percentage" summary of the run as single line and only dumps full output of the test +after it completes. + +.. code-block:: bash + + breeze tests --test-type Core --limit-progress-output + Running Tests of a specified type from the Host ----------------------------------------------- @@ -472,8 +487,6 @@ This is done for three reasons: 1. in order to selectively run only subset of the test types for some PRs 2. in order to allow parallel execution of the tests on Self-Hosted runners -For case 1. see `Pull Request Workflow `_ for details. - For case 2. We can utilise memory and CPUs available on both CI and local development machines to run test in parallel. This way we can decrease the time of running all tests in self-hosted runners from 60 minutes to ~15 minutes. diff --git a/airflow/api/auth/backend/basic_auth.py b/airflow/api/auth/backend/basic_auth.py index 397a722a98cf2..12f00b435fe11 100644 --- a/airflow/api/auth/backend/basic_auth.py +++ b/airflow/api/auth/backend/basic_auth.py @@ -18,10 +18,11 @@ from functools import wraps from typing import Any, Callable, Optional, Tuple, TypeVar, Union, cast -from flask import Response, current_app, request +from flask import Response, request from flask_appbuilder.const import AUTH_LDAP from flask_login import login_user +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.www.fab_security.sqla.models import User CLIENT_AUTH: Optional[Union[Tuple[str, str], Any]] = None @@ -40,7 +41,7 @@ def auth_current_user() -> Optional[User]: if auth is None or not auth.username or not auth.password: return None - ab_security_manager = current_app.appbuilder.sm + ab_security_manager = get_airflow_app().appbuilder.sm user = None if ab_security_manager.auth_type == AUTH_LDAP: user = ab_security_manager.auth_user_ldap(auth.username, auth.password) diff --git a/airflow/api_connexion/endpoints/config_endpoint.py b/airflow/api_connexion/endpoints/config_endpoint.py index 9514621447609..bdd2b3a959547 100644 --- a/airflow/api_connexion/endpoints/config_endpoint.py +++ b/airflow/api_connexion/endpoints/config_endpoint.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from http import HTTPStatus + from flask import Response, request from airflow.api_connexion import security @@ -72,7 +74,7 @@ def get_config() -> Response: } return_type = request.accept_mimetypes.best_match(serializer.keys()) if return_type not in serializer: - return Response(status=406) + return Response(status=HTTPStatus.NOT_ACCEPTABLE) elif conf.getboolean("webserver", "expose_config"): conf_dict = conf.as_dict(display_source=False, display_sensitive=True) config = _conf_dict_to_config(conf_dict) diff --git a/airflow/api_connexion/endpoints/connection_endpoint.py b/airflow/api_connexion/endpoints/connection_endpoint.py index f9be9c227e3f3..b196b3236b911 100644 --- a/airflow/api_connexion/endpoints/connection_endpoint.py +++ b/airflow/api_connexion/endpoints/connection_endpoint.py @@ -16,6 +16,7 @@ # under the License. import os +from http import HTTPStatus from connexion import NoContent from flask import request @@ -51,7 +52,7 @@ def delete_connection(*, connection_id: str, session: Session = NEW_SESSION) -> detail=f"The Connection with connection_id: `{connection_id}` was not found", ) session.delete(connection) - return NoContent, 204 + return NoContent, HTTPStatus.NO_CONTENT @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_CONNECTION)]) diff --git a/airflow/api_connexion/endpoints/dag_endpoint.py b/airflow/api_connexion/endpoints/dag_endpoint.py index e94707b127a69..7940a25c8f9fb 100644 --- a/airflow/api_connexion/endpoints/dag_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_endpoint.py @@ -15,10 +15,11 @@ # specific language governing permissions and limitations # under the License. +from http import HTTPStatus from typing import Collection, Optional from connexion import NoContent -from flask import current_app, g, request +from flask import g, request from marshmallow import ValidationError from sqlalchemy.orm import Session from sqlalchemy.sql.expression import or_ @@ -37,6 +38,7 @@ from airflow.exceptions import AirflowException, DagNotFound from airflow.models.dag import DagModel, DagTag from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.session import NEW_SESSION, provide_session @@ -55,7 +57,7 @@ def get_dag(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG)]) def get_dag_details(*, dag_id: str) -> APIResponse: """Get details of DAG.""" - dag: DAG = current_app.dag_bag.get_dag(dag_id) + dag: DAG = get_airflow_app().dag_bag.get_dag(dag_id) if not dag: raise NotFound("DAG not found", detail=f"The DAG with dag_id: {dag_id} was not found") return dag_detail_schema.dump(dag) @@ -82,7 +84,7 @@ def get_dags( if dag_id_pattern: dags_query = dags_query.filter(DagModel.dag_id.ilike(f'%{dag_id_pattern}%')) - readable_dags = current_app.appbuilder.sm.get_accessible_dag_ids(g.user) + readable_dags = get_airflow_app().appbuilder.sm.get_accessible_dag_ids(g.user) dags_query = dags_query.filter(DagModel.dag_id.in_(readable_dags)) if tags: @@ -142,7 +144,7 @@ def patch_dags(limit, session, offset=0, only_active=True, tags=None, dag_id_pat if dag_id_pattern == '~': dag_id_pattern = '%' dags_query = dags_query.filter(DagModel.dag_id.ilike(f'%{dag_id_pattern}%')) - editable_dags = current_app.appbuilder.sm.get_editable_dag_ids(g.user) + editable_dags = get_airflow_app().appbuilder.sm.get_editable_dag_ids(g.user) dags_query = dags_query.filter(DagModel.dag_id.in_(editable_dags)) if tags: @@ -176,4 +178,4 @@ def delete_dag(dag_id: str, session: Session = NEW_SESSION) -> APIResponse: except AirflowException: raise AlreadyExists(detail=f"Task instances of dag with id: '{dag_id}' are still running") - return NoContent, 204 + return NoContent, HTTPStatus.NO_CONTENT diff --git a/airflow/api_connexion/endpoints/dag_run_endpoint.py b/airflow/api_connexion/endpoints/dag_run_endpoint.py index a83ca223b07ac..351b723c4a283 100644 --- a/airflow/api_connexion/endpoints/dag_run_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_run_endpoint.py @@ -14,17 +14,19 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from http import HTTPStatus from typing import List, Optional, Tuple import pendulum from connexion import NoContent -from flask import current_app, g, request +from flask import g from marshmallow import ValidationError from sqlalchemy import or_ from sqlalchemy.orm import Query, Session from airflow.api.common.mark_tasks import set_dag_run_state_to_failed, set_dag_run_state_to_success from airflow.api_connexion import security +from airflow.api_connexion.endpoints.request_dict import get_json_request_dict from airflow.api_connexion.exceptions import AlreadyExists, BadRequest, NotFound from airflow.api_connexion.parameters import apply_sorting, check_limit, format_datetime, format_parameters from airflow.api_connexion.schemas.dag_run_schema import ( @@ -37,6 +39,7 @@ from airflow.api_connexion.types import APIResponse from airflow.models import DagModel, DagRun from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import DagRunState from airflow.utils.types import DagRunType @@ -53,7 +56,7 @@ def delete_dag_run(*, dag_id: str, dag_run_id: str, session: Session = NEW_SESSI """Delete a DAG Run""" if session.query(DagRun).filter(DagRun.dag_id == dag_id, DagRun.run_id == dag_run_id).delete() == 0: raise NotFound(detail=f"DAGRun with DAG ID: '{dag_id}' and DagRun ID: '{dag_run_id}' not found") - return NoContent, 204 + return NoContent, HTTPStatus.NO_CONTENT @security.requires_access( @@ -157,7 +160,7 @@ def get_dag_runs( # This endpoint allows specifying ~ as the dag_id to retrieve DAG Runs for all DAGs. if dag_id == "~": - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder query = query.filter(DagRun.dag_id.in_(appbuilder.sm.get_readable_dag_ids(g.user))) else: query = query.filter(DagRun.dag_id == dag_id) @@ -189,13 +192,13 @@ def get_dag_runs( @provide_session def get_dag_runs_batch(*, session: Session = NEW_SESSION) -> APIResponse: """Get list of DAG Runs""" - body = request.get_json() + body = get_json_request_dict() try: data = dagruns_batch_form_schema.load(body) except ValidationError as err: raise BadRequest(detail=str(err.messages)) - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder readable_dag_ids = appbuilder.sm.get_readable_dag_ids(g.user) query = session.query(DagRun) if data.get("dag_ids"): @@ -242,7 +245,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: detail=f"DAG with dag_id: '{dag_id}' has import errors", ) try: - post_body = dagrun_schema.load(request.json, session=session) + post_body = dagrun_schema.load(get_json_request_dict(), session=session) except ValidationError as err: raise BadRequest(detail=str(err)) @@ -258,7 +261,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: ) if not dagrun_instance: try: - dag = current_app.dag_bag.get_dag(dag_id) + dag = get_airflow_app().dag_bag.get_dag(dag_id) dag_run = dag.create_dagrun( run_type=DagRunType.MANUAL, run_id=run_id, @@ -267,7 +270,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: state=DagRunState.QUEUED, conf=post_body.get("conf"), external_trigger=True, - dag_hash=current_app.dag_bag.dags_hash.get(dag_id), + dag_hash=get_airflow_app().dag_bag.dags_hash.get(dag_id), ) return dagrun_schema.dump(dag_run) except ValueError as ve: @@ -300,12 +303,12 @@ def update_dag_run_state(*, dag_id: str, dag_run_id: str, session: Session = NEW error_message = f'Dag Run id {dag_run_id} not found in dag {dag_id}' raise NotFound(error_message) try: - post_body = set_dagrun_state_form_schema.load(request.json) + post_body = set_dagrun_state_form_schema.load(get_json_request_dict()) except ValidationError as err: raise BadRequest(detail=str(err)) state = post_body['state'] - dag = current_app.dag_bag.get_dag(dag_id) + dag = get_airflow_app().dag_bag.get_dag(dag_id) if state == DagRunState.SUCCESS: set_dag_run_state_to_success(dag=dag, run_id=dag_run.run_id, commit=True) else: diff --git a/airflow/api_connexion/endpoints/dag_source_endpoint.py b/airflow/api_connexion/endpoints/dag_source_endpoint.py index 74c3496a2c208..ad6209221e523 100644 --- a/airflow/api_connexion/endpoints/dag_source_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_source_endpoint.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from http import HTTPStatus + from flask import Response, current_app, request from itsdangerous import BadSignature, URLSafeSerializer @@ -42,4 +44,4 @@ def get_dag_source(*, file_token: str) -> Response: if return_type == 'application/json': content = dag_source_schema.dumps(dict(content=dag_source)) return Response(content, headers={'Content-Type': return_type}) - return Response("Not Allowed Accept Header", status=406) + return Response("Not Allowed Accept Header", status=HTTPStatus.NOT_ACCEPTABLE) diff --git a/airflow/api_connexion/endpoints/extra_link_endpoint.py b/airflow/api_connexion/endpoints/extra_link_endpoint.py index 3e9535603bda3..94b36928bfd0c 100644 --- a/airflow/api_connexion/endpoints/extra_link_endpoint.py +++ b/airflow/api_connexion/endpoints/extra_link_endpoint.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -from flask import current_app from sqlalchemy.orm.session import Session from airflow import DAG @@ -25,6 +24,7 @@ from airflow.exceptions import TaskNotFound from airflow.models.dagbag import DagBag from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.session import NEW_SESSION, provide_session @@ -46,7 +46,7 @@ def get_extra_links( """Get extra links for task instance""" from airflow.models.taskinstance import TaskInstance - dagbag: DagBag = current_app.dag_bag + dagbag: DagBag = get_airflow_app().dag_bag dag: DAG = dagbag.get_dag(dag_id) if not dag: raise NotFound("DAG not found", detail=f'DAG with ID = "{dag_id}" not found') diff --git a/airflow/api_connexion/endpoints/log_endpoint.py b/airflow/api_connexion/endpoints/log_endpoint.py index f1335fe527451..171cacb076e7c 100644 --- a/airflow/api_connexion/endpoints/log_endpoint.py +++ b/airflow/api_connexion/endpoints/log_endpoint.py @@ -14,10 +14,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - from typing import Any, Optional -from flask import Response, current_app, request +from flask import Response, request from itsdangerous.exc import BadSignature from itsdangerous.url_safe import URLSafeSerializer from sqlalchemy.orm.session import Session @@ -29,6 +28,7 @@ from airflow.exceptions import TaskNotFound from airflow.models import TaskInstance from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.log.log_reader import TaskLogReader from airflow.utils.session import NEW_SESSION, provide_session @@ -52,7 +52,7 @@ def get_log( session: Session = NEW_SESSION, ) -> APIResponse: """Get logs for specific task instance""" - key = current_app.config["SECRET_KEY"] + key = get_airflow_app().config["SECRET_KEY"] if not token: metadata = {} else: @@ -87,7 +87,7 @@ def get_log( metadata['end_of_log'] = True raise NotFound(title="TaskInstance not found") - dag = current_app.dag_bag.get_dag(dag_id) + dag = get_airflow_app().dag_bag.get_dag(dag_id) if dag: try: ti.task = dag.get_task(ti.task_id) @@ -101,7 +101,8 @@ def get_log( if return_type == 'application/json' or return_type is None: # default logs, metadata = task_log_reader.read_log_chunks(ti, task_try_number, metadata) logs = logs[0] if task_try_number is not None else logs - token = URLSafeSerializer(key).dumps(metadata) + # we must have token here, so we can safely ignore it + token = URLSafeSerializer(key).dumps(metadata) # type: ignore[assignment] return logs_schema.dump(LogResponseObject(continuation_token=token, content=logs)) # text/plain. Stream logs = task_log_reader.read_log_stream(ti, task_try_number, metadata) diff --git a/airflow/api_connexion/endpoints/pool_endpoint.py b/airflow/api_connexion/endpoints/pool_endpoint.py index e9c8aee252bec..594afeb49bc1a 100644 --- a/airflow/api_connexion/endpoints/pool_endpoint.py +++ b/airflow/api_connexion/endpoints/pool_endpoint.py @@ -14,15 +14,18 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + +from http import HTTPStatus from typing import Optional -from flask import Response, request +from flask import Response from marshmallow import ValidationError from sqlalchemy import func from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from airflow.api_connexion import security +from airflow.api_connexion.endpoints.request_dict import get_json_request_dict from airflow.api_connexion.exceptions import AlreadyExists, BadRequest, NotFound from airflow.api_connexion.parameters import apply_sorting, check_limit, format_parameters from airflow.api_connexion.schemas.pool_schema import PoolCollection, pool_collection_schema, pool_schema @@ -41,7 +44,7 @@ def delete_pool(*, pool_name: str, session: Session = NEW_SESSION) -> APIRespons affected_count = session.query(Pool).filter(Pool.pool == pool_name).delete() if affected_count == 0: raise NotFound(detail=f"Pool with name:'{pool_name}' not found") - return Response(status=204) + return Response(status=HTTPStatus.NO_CONTENT) @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_POOL)]) @@ -83,9 +86,10 @@ def patch_pool( session: Session = NEW_SESSION, ) -> APIResponse: """Update a pool""" + request_dict = get_json_request_dict() # Only slots can be modified in 'default_pool' try: - if pool_name == Pool.DEFAULT_POOL_NAME and request.json["name"] != Pool.DEFAULT_POOL_NAME: + if pool_name == Pool.DEFAULT_POOL_NAME and request_dict["name"] != Pool.DEFAULT_POOL_NAME: if update_mask and len(update_mask) == 1 and update_mask[0].strip() == "slots": pass else: @@ -98,7 +102,7 @@ def patch_pool( raise NotFound(detail=f"Pool with name:'{pool_name}' not found") try: - patch_body = pool_schema.load(request.json) + patch_body = pool_schema.load(request_dict) except ValidationError as err: raise BadRequest(detail=str(err.messages)) @@ -119,7 +123,7 @@ def patch_pool( else: required_fields = {"name", "slots"} - fields_diff = required_fields - set(request.json.keys()) + fields_diff = required_fields - set(get_json_request_dict().keys()) if fields_diff: raise BadRequest(detail=f"Missing required property(ies): {sorted(fields_diff)}") @@ -134,12 +138,12 @@ def patch_pool( def post_pool(*, session: Session = NEW_SESSION) -> APIResponse: """Create a pool""" required_fields = {"name", "slots"} # Pool would require both fields in the post request - fields_diff = required_fields - set(request.json.keys()) + fields_diff = required_fields - set(get_json_request_dict().keys()) if fields_diff: raise BadRequest(detail=f"Missing required property(ies): {sorted(fields_diff)}") try: - post_body = pool_schema.load(request.json, session=session) + post_body = pool_schema.load(get_json_request_dict(), session=session) except ValidationError as err: raise BadRequest(detail=str(err.messages)) diff --git a/airflow/api_connexion/endpoints/request_dict.py b/airflow/api_connexion/endpoints/request_dict.py new file mode 100644 index 0000000000000..4d7ad21250586 --- /dev/null +++ b/airflow/api_connexion/endpoints/request_dict.py @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Mapping, cast + + +def get_json_request_dict() -> Mapping[str, Any]: + from flask import request + + return cast(Mapping[str, Any], request.get_json()) diff --git a/airflow/api_connexion/endpoints/role_and_permission_endpoint.py b/airflow/api_connexion/endpoints/role_and_permission_endpoint.py index a25856e111b2c..1b25769af7737 100644 --- a/airflow/api_connexion/endpoints/role_and_permission_endpoint.py +++ b/airflow/api_connexion/endpoints/role_and_permission_endpoint.py @@ -15,10 +15,11 @@ # specific language governing permissions and limitations # under the License. +from http import HTTPStatus from typing import List, Optional, Tuple from connexion import NoContent -from flask import current_app, request +from flask import request from marshmallow import ValidationError from sqlalchemy import asc, desc, func @@ -34,6 +35,7 @@ ) from airflow.api_connexion.types import APIResponse, UpdateMask from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.www.fab_security.sqla.models import Action, Role from airflow.www.security import AirflowSecurityManager @@ -54,7 +56,7 @@ def _check_action_and_resource(sm: AirflowSecurityManager, perms: List[Tuple[str @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_ROLE)]) def get_role(*, role_name: str) -> APIResponse: """Get role""" - ab_security_manager = current_app.appbuilder.sm + ab_security_manager = get_airflow_app().appbuilder.sm role = ab_security_manager.find_role(name=role_name) if not role: raise NotFound(title="Role not found", detail=f"Role with name {role_name!r} was not found") @@ -65,7 +67,7 @@ def get_role(*, role_name: str) -> APIResponse: @format_parameters({"limit": check_limit}) def get_roles(*, order_by: str = "name", limit: int, offset: Optional[int] = None) -> APIResponse: """Get roles""" - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder session = appbuilder.get_session total_entries = session.query(func.count(Role.id)).scalar() direction = desc if order_by.startswith("-") else asc @@ -89,7 +91,7 @@ def get_roles(*, order_by: str = "name", limit: int, offset: Optional[int] = Non @format_parameters({'limit': check_limit}) def get_permissions(*, limit: int, offset: Optional[int] = None) -> APIResponse: """Get permissions""" - session = current_app.appbuilder.get_session + session = get_airflow_app().appbuilder.get_session total_entries = session.query(func.count(Action.id)).scalar() query = session.query(Action) actions = query.offset(offset).limit(limit).all() @@ -99,18 +101,18 @@ def get_permissions(*, limit: int, offset: Optional[int] = None) -> APIResponse: @security.requires_access([(permissions.ACTION_CAN_DELETE, permissions.RESOURCE_ROLE)]) def delete_role(*, role_name: str) -> APIResponse: """Delete a role""" - ab_security_manager = current_app.appbuilder.sm + ab_security_manager = get_airflow_app().appbuilder.sm role = ab_security_manager.find_role(name=role_name) if not role: raise NotFound(title="Role not found", detail=f"Role with name {role_name!r} was not found") ab_security_manager.delete_role(role_name=role_name) - return NoContent, 204 + return NoContent, HTTPStatus.NO_CONTENT @security.requires_access([(permissions.ACTION_CAN_EDIT, permissions.RESOURCE_ROLE)]) def patch_role(*, role_name: str, update_mask: UpdateMask = None) -> APIResponse: """Update a role""" - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder security_manager = appbuilder.sm body = request.json try: @@ -144,7 +146,7 @@ def patch_role(*, role_name: str, update_mask: UpdateMask = None) -> APIResponse @security.requires_access([(permissions.ACTION_CAN_CREATE, permissions.RESOURCE_ROLE)]) def post_role() -> APIResponse: """Create a new role""" - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder security_manager = appbuilder.sm body = request.json try: diff --git a/airflow/api_connexion/endpoints/task_endpoint.py b/airflow/api_connexion/endpoints/task_endpoint.py index 28c39b000c28d..74b6e7e9ee8ed 100644 --- a/airflow/api_connexion/endpoints/task_endpoint.py +++ b/airflow/api_connexion/endpoints/task_endpoint.py @@ -16,8 +16,6 @@ # under the License. from operator import attrgetter -from flask import current_app - from airflow import DAG from airflow.api_connexion import security from airflow.api_connexion.exceptions import BadRequest, NotFound @@ -25,6 +23,7 @@ from airflow.api_connexion.types import APIResponse from airflow.exceptions import TaskNotFound from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app @security.requires_access( @@ -35,7 +34,7 @@ ) def get_task(*, dag_id: str, task_id: str) -> APIResponse: """Get simplified representation of a task.""" - dag: DAG = current_app.dag_bag.get_dag(dag_id) + dag: DAG = get_airflow_app().dag_bag.get_dag(dag_id) if not dag: raise NotFound("DAG not found") @@ -54,7 +53,7 @@ def get_task(*, dag_id: str, task_id: str) -> APIResponse: ) def get_tasks(*, dag_id: str, order_by: str = "task_id") -> APIResponse: """Get tasks for DAG""" - dag: DAG = current_app.dag_bag.get_dag(dag_id) + dag: DAG = get_airflow_app().dag_bag.get_dag(dag_id) if not dag: raise NotFound("DAG not found") tasks = dag.tasks diff --git a/airflow/api_connexion/endpoints/task_instance_endpoint.py b/airflow/api_connexion/endpoints/task_instance_endpoint.py index c2416ab0d9d44..6cc3e784e62a3 100644 --- a/airflow/api_connexion/endpoints/task_instance_endpoint.py +++ b/airflow/api_connexion/endpoints/task_instance_endpoint.py @@ -16,7 +16,6 @@ # under the License. from typing import Any, Iterable, List, Optional, Tuple, TypeVar -from flask import current_app, request from marshmallow import ValidationError from sqlalchemy import and_, func, or_ from sqlalchemy.exc import MultipleResultsFound @@ -25,6 +24,7 @@ from sqlalchemy.sql import ClauseElement from airflow.api_connexion import security +from airflow.api_connexion.endpoints.request_dict import get_json_request_dict from airflow.api_connexion.exceptions import BadRequest, NotFound from airflow.api_connexion.parameters import format_datetime, format_parameters from airflow.api_connexion.schemas.task_instance_schema import ( @@ -42,6 +42,7 @@ from airflow.models.dagrun import DagRun as DR from airflow.models.taskinstance import TaskInstance as TI, clear_task_instances from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import DagRunState, State @@ -188,7 +189,7 @@ def get_mapped_task_instances( # 0 can mean a mapped TI that expanded to an empty list, so it is not an automatic 404 if base_query.with_entities(func.count('*')).scalar() == 0: - dag = current_app.dag_bag.get_dag(dag_id) + dag = get_airflow_app().dag_bag.get_dag(dag_id) if not dag: error_message = f"DAG {dag_id} not found" raise NotFound(error_message) @@ -364,7 +365,7 @@ def get_task_instances( @provide_session def get_task_instances_batch(session: Session = NEW_SESSION) -> APIResponse: """Get list of task instances.""" - body = request.get_json() + body = get_json_request_dict() try: data = task_instance_batch_form.load(body) except ValidationError as err: @@ -423,20 +424,20 @@ def get_task_instances_batch(session: Session = NEW_SESSION) -> APIResponse: @provide_session def post_clear_task_instances(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: """Clear task instances.""" - body = request.get_json() + body = get_json_request_dict() try: data = clear_task_instance_form.load(body) except ValidationError as err: raise BadRequest(detail=str(err.messages)) - dag = current_app.dag_bag.get_dag(dag_id) + dag = get_airflow_app().dag_bag.get_dag(dag_id) if not dag: error_message = f"Dag id {dag_id} not found" raise NotFound(error_message) reset_dag_runs = data.pop('reset_dag_runs') dry_run = data.pop('dry_run') # We always pass dry_run here, otherwise this would try to confirm on the terminal! - task_instances = dag.clear(dry_run=True, dag_bag=current_app.dag_bag, **data) + task_instances = dag.clear(dry_run=True, dag_bag=get_airflow_app().dag_bag, **data) if not dry_run: clear_task_instances( task_instances.all(), @@ -460,14 +461,14 @@ def post_clear_task_instances(*, dag_id: str, session: Session = NEW_SESSION) -> @provide_session def post_set_task_instances_state(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: """Set a state of task instances.""" - body = request.get_json() + body = get_json_request_dict() try: data = set_task_instance_state_form.load(body) except ValidationError as err: raise BadRequest(detail=str(err.messages)) error_message = f"Dag ID {dag_id} not found" - dag = current_app.dag_bag.get_dag(dag_id) + dag = get_airflow_app().dag_bag.get_dag(dag_id) if not dag: raise NotFound(error_message) diff --git a/airflow/api_connexion/endpoints/user_endpoint.py b/airflow/api_connexion/endpoints/user_endpoint.py index 82375cebcaf16..3ab476e219cb9 100644 --- a/airflow/api_connexion/endpoints/user_endpoint.py +++ b/airflow/api_connexion/endpoints/user_endpoint.py @@ -14,10 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from http import HTTPStatus from typing import List, Optional from connexion import NoContent -from flask import current_app, request +from flask import request from marshmallow import ValidationError from sqlalchemy import asc, desc, func from werkzeug.security import generate_password_hash @@ -33,13 +34,14 @@ ) from airflow.api_connexion.types import APIResponse, UpdateMask from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.www.fab_security.sqla.models import Role, User @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_USER)]) def get_user(*, username: str) -> APIResponse: """Get a user""" - ab_security_manager = current_app.appbuilder.sm + ab_security_manager = get_airflow_app().appbuilder.sm user = ab_security_manager.find_user(username=username) if not user: raise NotFound(title="User not found", detail=f"The User with username `{username}` was not found") @@ -50,7 +52,7 @@ def get_user(*, username: str) -> APIResponse: @format_parameters({"limit": check_limit}) def get_users(*, limit: int, order_by: str = "id", offset: Optional[str] = None) -> APIResponse: """Get users""" - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder session = appbuilder.get_session total_entries = session.query(func.count(User.id)).scalar() direction = desc if order_by.startswith("-") else asc @@ -86,7 +88,7 @@ def post_user() -> APIResponse: except ValidationError as e: raise BadRequest(detail=str(e.messages)) - security_manager = current_app.appbuilder.sm + security_manager = get_airflow_app().appbuilder.sm username = data["username"] email = data["email"] @@ -129,7 +131,7 @@ def patch_user(*, username: str, update_mask: UpdateMask = None) -> APIResponse: except ValidationError as e: raise BadRequest(detail=str(e.messages)) - security_manager = current_app.appbuilder.sm + security_manager = get_airflow_app().appbuilder.sm user = security_manager.find_user(username=username) if user is None: @@ -193,7 +195,7 @@ def patch_user(*, username: str, update_mask: UpdateMask = None) -> APIResponse: @security.requires_access([(permissions.ACTION_CAN_DELETE, permissions.RESOURCE_USER)]) def delete_user(*, username: str) -> APIResponse: """Delete a user""" - security_manager = current_app.appbuilder.sm + security_manager = get_airflow_app().appbuilder.sm user = security_manager.find_user(username=username) if user is None: @@ -204,4 +206,4 @@ def delete_user(*, username: str) -> APIResponse: security_manager.get_session.delete(user) security_manager.get_session.commit() - return NoContent, 204 + return NoContent, HTTPStatus.NO_CONTENT diff --git a/airflow/api_connexion/endpoints/variable_endpoint.py b/airflow/api_connexion/endpoints/variable_endpoint.py index 067d163401454..8f039b6fdbe69 100644 --- a/airflow/api_connexion/endpoints/variable_endpoint.py +++ b/airflow/api_connexion/endpoints/variable_endpoint.py @@ -14,14 +14,16 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from http import HTTPStatus from typing import Optional -from flask import Response, request +from flask import Response from marshmallow import ValidationError from sqlalchemy import func from sqlalchemy.orm import Session from airflow.api_connexion import security +from airflow.api_connexion.endpoints.request_dict import get_json_request_dict from airflow.api_connexion.exceptions import BadRequest, NotFound from airflow.api_connexion.parameters import apply_sorting, check_limit, format_parameters from airflow.api_connexion.schemas.variable_schema import variable_collection_schema, variable_schema @@ -36,7 +38,7 @@ def delete_variable(*, variable_key: str) -> Response: """Delete variable""" if Variable.delete(variable_key) == 0: raise NotFound("Variable not found") - return Response(status=204) + return Response(status=HTTPStatus.NO_CONTENT) @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_VARIABLE)]) @@ -78,7 +80,7 @@ def get_variables( def patch_variable(*, variable_key: str, update_mask: UpdateMask = None) -> Response: """Update a variable by key""" try: - data = variable_schema.load(request.json) + data = variable_schema.load(get_json_request_dict()) except ValidationError as err: raise BadRequest("Invalid Variable schema", detail=str(err.messages)) @@ -99,7 +101,7 @@ def patch_variable(*, variable_key: str, update_mask: UpdateMask = None) -> Resp def post_variables() -> Response: """Create a variable""" try: - data = variable_schema.load(request.json) + data = variable_schema.load(get_json_request_dict()) except ValidationError as err: raise BadRequest("Invalid Variable schema", detail=str(err.messages)) diff --git a/airflow/api_connexion/endpoints/xcom_endpoint.py b/airflow/api_connexion/endpoints/xcom_endpoint.py index 9cc6b6d79a933..62c7262f7ed2c 100644 --- a/airflow/api_connexion/endpoints/xcom_endpoint.py +++ b/airflow/api_connexion/endpoints/xcom_endpoint.py @@ -16,7 +16,7 @@ # under the License. from typing import Optional -from flask import current_app, g +from flask import g from sqlalchemy import and_ from sqlalchemy.orm import Session @@ -27,6 +27,7 @@ from airflow.api_connexion.types import APIResponse from airflow.models import DagRun as DR, XCom from airflow.security import permissions +from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.session import NEW_SESSION, provide_session @@ -52,7 +53,7 @@ def get_xcom_entries( """Get all XCom values""" query = session.query(XCom) if dag_id == '~': - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder readable_dag_ids = appbuilder.sm.get_readable_dag_ids(g.user) query = query.filter(XCom.dag_id.in_(readable_dag_ids)) query = query.join(DR, and_(XCom.dag_id == DR.dag_id, XCom.run_id == DR.run_id)) diff --git a/airflow/api_connexion/exceptions.py b/airflow/api_connexion/exceptions.py index 0c6c4fa0d3a8f..8fb7f2e78883b 100644 --- a/airflow/api_connexion/exceptions.py +++ b/airflow/api_connexion/exceptions.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from http import HTTPStatus from typing import Any, Dict, Optional import flask @@ -80,7 +81,7 @@ def __init__( **kwargs: Any, ) -> None: super().__init__( - status=404, + status=HTTPStatus.NOT_FOUND, type=EXCEPTIONS_LINK_MAP[404], title=title, detail=detail, @@ -100,7 +101,7 @@ def __init__( **kwargs: Any, ) -> None: super().__init__( - status=400, + status=HTTPStatus.BAD_REQUEST, type=EXCEPTIONS_LINK_MAP[400], title=title, detail=detail, @@ -120,7 +121,7 @@ def __init__( **kwargs: Any, ): super().__init__( - status=401, + status=HTTPStatus.UNAUTHORIZED, type=EXCEPTIONS_LINK_MAP[401], title=title, detail=detail, @@ -140,7 +141,7 @@ def __init__( **kwargs: Any, ) -> None: super().__init__( - status=403, + status=HTTPStatus.FORBIDDEN, type=EXCEPTIONS_LINK_MAP[403], title=title, detail=detail, @@ -160,7 +161,7 @@ def __init__( **kwargs: Any, ): super().__init__( - status=409, + status=HTTPStatus.CONFLICT, type=EXCEPTIONS_LINK_MAP[409], title=title, detail=detail, @@ -180,7 +181,7 @@ def __init__( **kwargs: Any, ) -> None: super().__init__( - status=500, + status=HTTPStatus.INTERNAL_SERVER_ERROR, type=EXCEPTIONS_LINK_MAP[500], title=title, detail=detail, diff --git a/airflow/api_connexion/schemas/dag_schema.py b/airflow/api_connexion/schemas/dag_schema.py index 2f369113290d9..6e7410dc4f2ef 100644 --- a/airflow/api_connexion/schemas/dag_schema.py +++ b/airflow/api_connexion/schemas/dag_schema.py @@ -83,7 +83,7 @@ def get_owners(obj: DagModel): @staticmethod def get_token(obj: DagModel): """Return file token""" - serializer = URLSafeSerializer(conf.get('webserver', 'secret_key')) + serializer = URLSafeSerializer(conf.get_mandatory_value('webserver', 'secret_key')) return serializer.dumps(obj.fileloc) diff --git a/airflow/api_connexion/schemas/task_instance_schema.py b/airflow/api_connexion/schemas/task_instance_schema.py index 37005256f6cdc..74824dbaf87c6 100644 --- a/airflow/api_connexion/schemas/task_instance_schema.py +++ b/airflow/api_connexion/schemas/task_instance_schema.py @@ -60,7 +60,7 @@ class Meta: pid = auto_field() executor_config = auto_field() sla_miss = fields.Nested(SlaMissSchema, dump_default=None) - rendered_fields = JsonObjectField(default={}) + rendered_fields = JsonObjectField(dump_default={}) def get_attribute(self, obj, attr, default): if attr == "sla_miss": diff --git a/airflow/api_connexion/security.py b/airflow/api_connexion/security.py index 3562c98eb4b35..6c84181f91bd3 100644 --- a/airflow/api_connexion/security.py +++ b/airflow/api_connexion/security.py @@ -18,16 +18,17 @@ from functools import wraps from typing import Callable, Optional, Sequence, Tuple, TypeVar, cast -from flask import Response, current_app +from flask import Response from airflow.api_connexion.exceptions import PermissionDenied, Unauthenticated +from airflow.utils.airflow_flask_app import get_airflow_app T = TypeVar("T", bound=Callable) def check_authentication() -> None: """Checks that the request has valid authorization information.""" - for auth in current_app.api_auth: + for auth in get_airflow_app().api_auth: response = auth.requires_authentication(Response)() if response.status_code == 200: return @@ -38,7 +39,7 @@ def check_authentication() -> None: def requires_access(permissions: Optional[Sequence[Tuple[str, str]]] = None) -> Callable[[T], T]: """Factory for decorator that checks current user's permissions against required permissions.""" - appbuilder = current_app.appbuilder + appbuilder = get_airflow_app().appbuilder appbuilder.sm.sync_resource_permissions(permissions) def requires_access_decorator(func: T): diff --git a/airflow/cli/commands/triggerer_command.py b/airflow/cli/commands/triggerer_command.py index 8bf419268059b..82e7fde129698 100644 --- a/airflow/cli/commands/triggerer_command.py +++ b/airflow/cli/commands/triggerer_command.py @@ -24,7 +24,7 @@ from airflow import settings from airflow.jobs.triggerer_job import TriggererJob from airflow.utils import cli as cli_utils -from airflow.utils.cli import setup_locations, setup_logging, sigquit_handler +from airflow.utils.cli import setup_locations, setup_logging, sigint_handler, sigquit_handler @cli_utils.action_cli @@ -50,19 +50,7 @@ def triggerer(args): job.run() else: - # There is a bug in CPython (fixed in March 2022 but not yet released) that - # makes async.io handle SIGTERM improperly by using async unsafe - # functions and hanging the triggerer receive SIGPIPE while handling - # SIGTERN/SIGINT and deadlocking itself. Until the bug is handled - # we should rather rely on standard handling of the signals rather than - # adding our own signal handlers. Seems that even if our signal handler - # just run exit(0) - it caused a race condition that led to the hanging. - # - # More details: - # * https://bugs.python.org/issue39622 - # * https://github.com/python/cpython/issues/83803 - # - # signal.signal(signal.SIGINT, sigint_handler) - # signal.signal(signal.SIGTERM, sigint_handler) + signal.signal(signal.SIGINT, sigint_handler) + signal.signal(signal.SIGTERM, sigint_handler) signal.signal(signal.SIGQUIT, sigquit_handler) job.run() diff --git a/airflow/config_templates/airflow_local_settings.py b/airflow/config_templates/airflow_local_settings.py index b2752c2be7c25..6684fd18e51a0 100644 --- a/airflow/config_templates/airflow_local_settings.py +++ b/airflow/config_templates/airflow_local_settings.py @@ -82,7 +82,6 @@ 'class': 'airflow.utils.log.file_task_handler.FileTaskHandler', 'formatter': 'airflow', 'base_log_folder': os.path.expanduser(BASE_LOG_FOLDER), - 'filename_template': FILENAME_TEMPLATE, 'filters': ['mask_secrets'], }, 'processor': { diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 3d89a3004177e..9da14d48aa64d 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -450,7 +450,7 @@ Check connection at the start of each connection pool checkout. Typically, this is a simple statement like "SELECT 1". More information here: - https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic + https://docs.sqlalchemy.org/en/14/core/pooling.html#disconnect-handling-pessimistic version_added: 2.3.0 type: string example: ~ @@ -468,7 +468,7 @@ Import path for connect args in SqlAlchemy. Defaults to an empty dict. This is useful when you want to configure db engine args that SqlAlchemy won't parse in connection string. - See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args + See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.connect_args version_added: 2.3.0 type: string example: ~ diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index e177751e899e3..f58b688a6be3d 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -255,7 +255,7 @@ sql_alchemy_pool_recycle = 1800 # Check connection at the start of each connection pool checkout. # Typically, this is a simple statement like "SELECT 1". # More information here: -# https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic +# https://docs.sqlalchemy.org/en/14/core/pooling.html#disconnect-handling-pessimistic sql_alchemy_pool_pre_ping = True # The schema to use for the metadata database. @@ -265,7 +265,7 @@ sql_alchemy_schema = # Import path for connect args in SqlAlchemy. Defaults to an empty dict. # This is useful when you want to configure db engine args that SqlAlchemy won't parse # in connection string. -# See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args +# See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.connect_args # sql_alchemy_connect_args = # Whether to load the default connections that ship with Airflow. It's good to diff --git a/airflow/config_templates/default_test.cfg b/airflow/config_templates/default_test.cfg index 2f9b6fa264b13..83260d0d5250f 100644 --- a/airflow/config_templates/default_test.cfg +++ b/airflow/config_templates/default_test.cfg @@ -54,7 +54,6 @@ base_log_folder = {AIRFLOW_HOME}/logs logging_level = INFO celery_logging_level = WARN fab_logging_level = WARN -log_filename_template = {{{{ ti.dag_id }}}}/{{{{ ti.task_id }}}}/{{{{ ts }}}}/{{{{ try_number }}}}.log log_processor_filename_template = {{{{ filename }}}}.log dag_processor_manager_log_location = {AIRFLOW_HOME}/logs/dag_processor_manager/dag_processor_manager.log worker_log_server_port = 8793 diff --git a/airflow/configuration.py b/airflow/configuration.py index 729e780f74a6a..4f2caa186e2f7 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -256,7 +256,7 @@ class AirflowConfigParser(ConfigParser): }, 'elasticsearch': { 'log_id_template': ( - re.compile('^' + re.escape('{dag_id}-{task_id}-{run_id}-{try_number}') + '$'), + re.compile('^' + re.escape('{dag_id}-{task_id}-{execution_date}-{try_number}') + '$'), '{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}', '3.0', ) @@ -361,21 +361,24 @@ def _upgrade_auth_backends(self): ) def _upgrade_postgres_metastore_conn(self): - """As of sqlalchemy 1.4, scheme `postgres+psycopg2` must be replaced with `postgresql`""" + """ + As of SQLAlchemy 1.4, schemes `postgres+psycopg2` and `postgres` + must be replaced with `postgresql`. + """ section, key = 'database', 'sql_alchemy_conn' old_value = self.get(section, key) - bad_scheme = 'postgres+psycopg2' + bad_schemes = ['postgres+psycopg2', 'postgres'] good_scheme = 'postgresql' parsed = urlparse(old_value) - if parsed.scheme == bad_scheme: + if parsed.scheme in bad_schemes: warnings.warn( - f"Bad scheme in Airflow configuration core > sql_alchemy_conn: `{bad_scheme}`. " - "As of SqlAlchemy 1.4 (adopted in Airflow 2.3) this is no longer supported. You must " + f"Bad scheme in Airflow configuration core > sql_alchemy_conn: `{parsed.scheme}`. " + "As of SQLAlchemy 1.4 (adopted in Airflow 2.3) this is no longer supported. You must " f"change to `{good_scheme}` before the next Airflow release.", FutureWarning, ) self.upgraded_values[(section, key)] = old_value - new_value = re.sub('^' + re.escape(f"{bad_scheme}://"), f"{good_scheme}://", old_value) + new_value = re.sub('^' + re.escape(f"{parsed.scheme}://"), f"{good_scheme}://", old_value) self._update_env_var(section=section, name=key, new_value=new_value) # if the old value is set via env var, we need to wipe it diff --git a/airflow/example_dags/example_branch_datetime_operator.py b/airflow/example_dags/example_branch_datetime_operator.py index 3c86e40402aef..e707514c868a0 100644 --- a/airflow/example_dags/example_branch_datetime_operator.py +++ b/airflow/example_dags/example_branch_datetime_operator.py @@ -26,7 +26,7 @@ from airflow.operators.datetime import BranchDateTimeOperator from airflow.operators.empty import EmptyOperator -dag = DAG( +dag1 = DAG( dag_id="example_branch_datetime_operator", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, @@ -35,8 +35,8 @@ ) # [START howto_branch_datetime_operator] -empty_task_1 = EmptyOperator(task_id='date_in_range', dag=dag) -empty_task_2 = EmptyOperator(task_id='date_outside_range', dag=dag) +empty_task_11 = EmptyOperator(task_id='date_in_range', dag=dag1) +empty_task_21 = EmptyOperator(task_id='date_outside_range', dag=dag1) cond1 = BranchDateTimeOperator( task_id='datetime_branch', @@ -44,15 +44,15 @@ follow_task_ids_if_false=['date_outside_range'], target_upper=pendulum.datetime(2020, 10, 10, 15, 0, 0), target_lower=pendulum.datetime(2020, 10, 10, 14, 0, 0), - dag=dag, + dag=dag1, ) # Run empty_task_1 if cond1 executes between 2020-10-10 14:00:00 and 2020-10-10 15:00:00 -cond1 >> [empty_task_1, empty_task_2] +cond1 >> [empty_task_11, empty_task_21] # [END howto_branch_datetime_operator] -dag = DAG( +dag2 = DAG( dag_id="example_branch_datetime_operator_2", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, @@ -60,8 +60,8 @@ schedule_interval="@daily", ) # [START howto_branch_datetime_operator_next_day] -empty_task_1 = EmptyOperator(task_id='date_in_range', dag=dag) -empty_task_2 = EmptyOperator(task_id='date_outside_range', dag=dag) +empty_task_12 = EmptyOperator(task_id='date_in_range', dag=dag2) +empty_task_22 = EmptyOperator(task_id='date_outside_range', dag=dag2) cond2 = BranchDateTimeOperator( task_id='datetime_branch', @@ -69,10 +69,10 @@ follow_task_ids_if_false=['date_outside_range'], target_upper=pendulum.time(0, 0, 0), target_lower=pendulum.time(15, 0, 0), - dag=dag, + dag=dag2, ) # Since target_lower happens after target_upper, target_upper will be moved to the following day # Run empty_task_1 if cond2 executes between 15:00:00, and 00:00:00 of the following day -cond2 >> [empty_task_1, empty_task_2] +cond2 >> [empty_task_12, empty_task_22] # [END howto_branch_datetime_operator_next_day] diff --git a/airflow/example_dags/example_branch_day_of_week_operator.py b/airflow/example_dags/example_branch_day_of_week_operator.py index e4aaeb147593f..879824ab1c876 100644 --- a/airflow/example_dags/example_branch_day_of_week_operator.py +++ b/airflow/example_dags/example_branch_day_of_week_operator.py @@ -33,8 +33,8 @@ schedule_interval="@daily", ) as dag: # [START howto_operator_day_of_week_branch] - empty_task_1 = EmptyOperator(task_id='branch_true', dag=dag) - empty_task_2 = EmptyOperator(task_id='branch_false', dag=dag) + empty_task_1 = EmptyOperator(task_id='branch_true') + empty_task_2 = EmptyOperator(task_id='branch_false') branch = BranchDayOfWeekOperator( task_id="make_choice", diff --git a/airflow/example_dags/example_external_task_marker_dag.py b/airflow/example_dags/example_external_task_marker_dag.py index 0c4479a0d66f0..733b732756633 100644 --- a/airflow/example_dags/example_external_task_marker_dag.py +++ b/airflow/example_dags/example_external_task_marker_dag.py @@ -18,23 +18,25 @@ """ Example DAG demonstrating setting up inter-DAG dependencies using ExternalTaskSensor and -ExternalTaskMarker +ExternalTaskMarker. In this example, child_task1 in example_external_task_marker_child depends on parent_task in -example_external_task_marker_parent. When parent_task is cleared with "Recursive" selected, -the presence of ExternalTaskMarker tells Airflow to clear child_task1 and its -downstream tasks. +example_external_task_marker_parent. When parent_task is cleared with 'Recursive' selected, +the presence of ExternalTaskMarker tells Airflow to clear child_task1 and its downstream tasks. ExternalTaskSensor will keep poking for the status of remote ExternalTaskMarker task at a regular interval till one of the following will happen: -1. ExternalTaskMarker reaches the states mentioned in the allowed_states list - In this case, ExternalTaskSensor will exit with a success status code -2. ExternalTaskMarker reaches the states mentioned in the failed_states list - In this case, ExternalTaskSensor will raise an AirflowException and user need to handle this - with multiple downstream tasks -3. ExternalTaskSensor times out - In this case, ExternalTaskSensor will raise AirflowSkipException or AirflowSensorTimeout - exception + +ExternalTaskMarker reaches the states mentioned in the allowed_states list. +In this case, ExternalTaskSensor will exit with a success status code + +ExternalTaskMarker reaches the states mentioned in the failed_states list +In this case, ExternalTaskSensor will raise an AirflowException and user need to handle this +with multiple downstream tasks + +ExternalTaskSensor times out. In this case, ExternalTaskSensor will raise AirflowSkipException +or AirflowSensorTimeout exception + """ import pendulum diff --git a/airflow/exceptions.py b/airflow/exceptions.py index 95fa9e3276545..f1a8c1cb66473 100644 --- a/airflow/exceptions.py +++ b/airflow/exceptions.py @@ -21,6 +21,7 @@ """Exceptions used by Airflow""" import datetime import warnings +from http import HTTPStatus from typing import Any, Dict, List, NamedTuple, Optional, Sized @@ -31,19 +32,19 @@ class AirflowException(Exception): Each custom exception should be derived from this class. """ - status_code = 500 + status_code = HTTPStatus.INTERNAL_SERVER_ERROR class AirflowBadRequest(AirflowException): """Raise when the application or server cannot handle the request.""" - status_code = 400 + status_code = HTTPStatus.BAD_REQUEST class AirflowNotFoundException(AirflowException): """Raise when the requested object/resource is not available in the system.""" - status_code = 404 + status_code = HTTPStatus.NOT_FOUND class AirflowConfigException(AirflowException): @@ -149,6 +150,10 @@ def __str__(self) -> str: return f"Ignoring DAG {self.dag_id} from {self.incoming} - also found in {self.existing}" +class AirflowDagInconsistent(AirflowException): + """Raise when a DAG has inconsistent attributes.""" + + class AirflowClusterPolicyViolation(AirflowException): """Raise when there is a violation of a Cluster Policy in DAG definition.""" @@ -185,6 +190,23 @@ class DuplicateTaskIdFound(AirflowException): """Raise when a Task with duplicate task_id is defined in the same DAG.""" +class TaskAlreadyInTaskGroup(AirflowException): + """Raise when a Task cannot be added to a TaskGroup since it already belongs to another TaskGroup.""" + + def __init__(self, task_id: str, existing_group_id: Optional[str], new_group_id: str) -> None: + super().__init__(task_id, new_group_id) + self.task_id = task_id + self.existing_group_id = existing_group_id + self.new_group_id = new_group_id + + def __str__(self) -> str: + if self.existing_group_id is None: + existing_group = "the DAG's root group" + else: + existing_group = f"group {self.existing_group_id!r}" + return f"cannot add {self.task_id!r} to {self.new_group_id!r} (already in {existing_group})" + + class SerializationError(AirflowException): """A problem occurred when trying to serialize a DAG.""" @@ -305,3 +327,7 @@ def __repr__(self) -> str: class TaskDeferralError(AirflowException): """Raised when a task failed during deferral for some reason.""" + + +class PodReconciliationError(AirflowException): + """Raised when an error is encountered while trying to merge pod configs.""" diff --git a/airflow/executors/kubernetes_executor.py b/airflow/executors/kubernetes_executor.py index c76cf58f418d4..e510da2b314d6 100644 --- a/airflow/executors/kubernetes_executor.py +++ b/airflow/executors/kubernetes_executor.py @@ -35,7 +35,7 @@ from kubernetes.client.rest import ApiException from urllib3.exceptions import ReadTimeoutError -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, PodReconciliationError from airflow.executors.base_executor import NOT_STARTED_MESSAGE, BaseExecutor, CommandType from airflow.kubernetes import pod_generator from airflow.kubernetes.kube_client import get_kube_client @@ -300,8 +300,9 @@ def run_next(self, next_job: KubernetesJobType) -> None: and store relevant info in the current_jobs map so we can track the job's status """ - self.log.info('Kubernetes job is %s', str(next_job).replace("\n", " ")) key, command, kube_executor_config, pod_template_file = next_job + self.log.info('Kubernetes job is %s', key) + dag_id, task_id, run_id, try_number, map_index = key if command[0:3] != ["airflow", "tasks", "run"]: @@ -617,6 +618,13 @@ def sync(self) -> None: task = self.task_queue.get_nowait() try: self.kube_scheduler.run_next(task) + except PodReconciliationError as e: + self.log.error( + "Pod reconciliation failed, likely due to kubernetes library upgrade. " + "Try clearing the task to re-run.", + exc_info=True, + ) + self.fail(task[0], e) except ApiException as e: # These codes indicate something is wrong with pod definition; otherwise we assume pod diff --git a/airflow/hooks/dbapi.py b/airflow/hooks/dbapi.py index da33bacca8447..0b9ce4377be23 100644 --- a/airflow/hooks/dbapi.py +++ b/airflow/hooks/dbapi.py @@ -128,6 +128,24 @@ def get_pandas_df(self, sql, parameters=None, **kwargs): with closing(self.get_conn()) as conn: return psql.read_sql(sql, con=conn, params=parameters, **kwargs) + def get_pandas_df_by_chunks(self, sql, parameters=None, *, chunksize, **kwargs): + """ + Executes the sql and returns a generator + + :param sql: the sql statement to be executed (str) or a list of + sql statements to execute + :param parameters: The parameters to render the SQL query with + :param chunksize: number of rows to include in each chunk + :param kwargs: (optional) passed into pandas.io.sql.read_sql method + """ + try: + from pandas.io import sql as psql + except ImportError: + raise Exception("pandas library not installed, run: pip install 'apache-airflow[pandas]'.") + + with closing(self.get_conn()) as conn: + yield from psql.read_sql(sql, con=conn, params=parameters, chunksize=chunksize, **kwargs) + def get_records(self, sql, parameters=None): """ Executes the sql and returns a set of records. diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py index b4f2714e3a6ae..3440832275ef2 100644 --- a/airflow/jobs/scheduler_job.py +++ b/airflow/jobs/scheduler_job.py @@ -627,7 +627,6 @@ def _process_executor_events(self, session: Session = None) -> int: buffer_key = ti.key.with_try_number(try_number) state, info = event_buffer.pop(buffer_key) - # TODO: should we fail RUNNING as well, as we do in Backfills? if state == TaskInstanceState.QUEUED: ti.external_executor_id = info self.log.info("Setting external_id for %s to %s", ti, info) @@ -663,7 +662,20 @@ def _process_executor_events(self, session: Session = None) -> int: ti.pid, ) - if ti.try_number == buffer_key.try_number and ti.state == State.QUEUED: + # There are two scenarios why the same TI with the same try_number is queued + # after executor is finished with it: + # 1) the TI was killed externally and it had no time to mark itself failed + # - in this case we should mark it as failed here. + # 2) the TI has been requeued after getting deferred - in this case either our executor has it + # or the TI is queued by another job. Either ways we should not fail it. + + # All of this could also happen if the state is "running", + # but that is handled by the zombie detection. + + ti_queued = ti.try_number == buffer_key.try_number and ti.state == TaskInstanceState.QUEUED + ti_requeued = ti.queued_by_job_id != self.id or self.executor.has_task(ti) + + if ti_queued and not ti_requeued: Stats.incr('scheduler.tasks.killed_externally') msg = ( "Executor reports task instance %s finished (%s) although the " diff --git a/airflow/kubernetes/pod_generator.py b/airflow/kubernetes/pod_generator.py index 52b45801ccabc..8a86919a65b75 100644 --- a/airflow/kubernetes/pod_generator.py +++ b/airflow/kubernetes/pod_generator.py @@ -34,7 +34,7 @@ from kubernetes.client import models as k8s from kubernetes.client.api_client import ApiClient -from airflow.exceptions import AirflowConfigException +from airflow.exceptions import AirflowConfigException, PodReconciliationError from airflow.kubernetes.pod_generator_deprecated import PodDefaults, PodGenerator as PodGeneratorDeprecated from airflow.utils import yaml from airflow.version import version as airflow_version @@ -389,7 +389,10 @@ def construct_pod( # Pod from the pod_template_File -> Pod from executor_config arg -> Pod from the K8s executor pod_list = [base_worker_pod, pod_override_object, dynamic_pod] - return reduce(PodGenerator.reconcile_pods, pod_list) + try: + return reduce(PodGenerator.reconcile_pods, pod_list) + except Exception as e: + raise PodReconciliationError from e @staticmethod def serialize_pod(pod: k8s.V1Pod) -> dict: diff --git a/airflow/migrations/versions/0080_2_0_2_change_default_pool_slots_to_1.py b/airflow/migrations/versions/0080_2_0_2_change_default_pool_slots_to_1.py index f5ae34c2977c9..ef819468efddc 100644 --- a/airflow/migrations/versions/0080_2_0_2_change_default_pool_slots_to_1.py +++ b/airflow/migrations/versions/0080_2_0_2_change_default_pool_slots_to_1.py @@ -37,6 +37,7 @@ def upgrade(): """Change default ``pool_slots`` to ``1`` and make pool_slots not nullable""" + op.execute("UPDATE task_instance SET pool_slots = 1 WHERE pool_slots IS NULL") with op.batch_alter_table("task_instance", schema=None) as batch_op: batch_op.alter_column("pool_slots", existing_type=sa.Integer, nullable=False, server_default='1') diff --git a/airflow/migrations/versions/0111_2_3_3_add_indexes_for_cascade_deletes.py b/airflow/migrations/versions/0111_2_3_3_add_indexes_for_cascade_deletes.py new file mode 100644 index 0000000000000..07cac9608cfac --- /dev/null +++ b/airflow/migrations/versions/0111_2_3_3_add_indexes_for_cascade_deletes.py @@ -0,0 +1,94 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Add indexes for CASCADE deletes on task_instance + +Some databases don't add indexes on the FK columns so we have to add them for performance on CASCADE deletes. + +Revision ID: f5fcbda3e651 +Revises: 3c94c427fdf6 +Create Date: 2022-06-15 18:04:54.081789 + +""" + +from alembic import context, op + +# revision identifiers, used by Alembic. +revision = 'f5fcbda3e651' +down_revision = '3c94c427fdf6' +branch_labels = None +depends_on = None +airflow_version = '2.3.3' + + +def _mysql_tables_where_indexes_already_present(conn): + """ + If user downgraded and is upgrading again, we have to check for existing + indexes on mysql because we can't (and don't) drop them as part of the + downgrade. + """ + to_check = [ + ('xcom', 'idx_xcom_task_instance'), + ('task_reschedule', 'idx_task_reschedule_dag_run'), + ('task_fail', 'idx_task_fail_task_instance'), + ] + tables = set() + for tbl, idx in to_check: + if conn.execute(f"show indexes from {tbl} where Key_name = '{idx}'").first(): + tables.add(tbl) + return tables + + +def upgrade(): + """Apply Add indexes for CASCADE deletes""" + conn = op.get_bind() + tables_to_skip = set() + + # mysql requires indexes for FKs, so adding had the effect of renaming, and we cannot remove. + if conn.dialect.name == 'mysql' and not context.is_offline_mode(): + tables_to_skip.update(_mysql_tables_where_indexes_already_present(conn)) + + if 'task_fail' not in tables_to_skip: + with op.batch_alter_table('task_fail', schema=None) as batch_op: + batch_op.create_index('idx_task_fail_task_instance', ['dag_id', 'task_id', 'run_id', 'map_index']) + + if 'task_reschedule' not in tables_to_skip: + with op.batch_alter_table('task_reschedule', schema=None) as batch_op: + batch_op.create_index('idx_task_reschedule_dag_run', ['dag_id', 'run_id']) + + if 'xcom' not in tables_to_skip: + with op.batch_alter_table('xcom', schema=None) as batch_op: + batch_op.create_index('idx_xcom_task_instance', ['dag_id', 'task_id', 'run_id', 'map_index']) + + +def downgrade(): + """Unapply Add indexes for CASCADE deletes""" + conn = op.get_bind() + + # mysql requires indexes for FKs, so adding had the effect of renaming, and we cannot remove. + if conn.dialect.name == 'mysql': + return + + with op.batch_alter_table('xcom', schema=None) as batch_op: + batch_op.drop_index('idx_xcom_task_instance') + + with op.batch_alter_table('task_reschedule', schema=None) as batch_op: + batch_op.drop_index('idx_task_reschedule_dag_run') + + with op.batch_alter_table('task_fail', schema=None) as batch_op: + batch_op.drop_index('idx_task_fail_task_instance') diff --git a/airflow/models/abstractoperator.py b/airflow/models/abstractoperator.py index 8d2e06442a2e5..4d50288673be0 100644 --- a/airflow/models/abstractoperator.py +++ b/airflow/models/abstractoperator.py @@ -302,7 +302,6 @@ def get_extra_links(self, ti: "TaskInstance", link_name: str) -> Optional[str]: return link.get_link(self, ti_key=ti.key) else: return link.get_link(self, ti.dag_run.logical_date) # type: ignore[misc] - return None def render_template_fields( self, diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 54f5b0667520f..823287dcb1856 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -63,7 +63,7 @@ from airflow import settings, utils from airflow.compat.functools import cached_property from airflow.configuration import conf -from airflow.exceptions import AirflowException, DuplicateTaskIdFound, TaskNotFound +from airflow.exceptions import AirflowDagInconsistent, AirflowException, DuplicateTaskIdFound, TaskNotFound from airflow.models.abstractoperator import AbstractOperator from airflow.models.base import ID_LEN, Base from airflow.models.dagbag import DagBag @@ -484,6 +484,47 @@ def __init__( self._task_group = TaskGroup.create_root(self) self.validate_schedule_and_params() + def _check_schedule_interval_matches_timetable(self) -> bool: + """Check ``schedule_interval`` and ``timetable`` match. + + This is done as a part of the DAG validation done before it's bagged, to + guard against the DAG's ``timetable`` (or ``schedule_interval``) from + being changed after it's created, e.g. + + .. code-block:: python + + dag1 = DAG("d1", timetable=MyTimetable()) + dag1.schedule_interval = "@once" + + dag2 = DAG("d2", schedule_interval="@once") + dag2.timetable = MyTimetable() + + Validation is done by creating a timetable and check its summary matches + ``schedule_interval``. The logic is not bullet-proof, especially if a + custom timetable does not provide a useful ``summary``. But this is the + best we can do. + """ + if self.schedule_interval == self.timetable.summary: + return True + try: + timetable = create_timetable(self.schedule_interval, self.timezone) + except ValueError: + return False + return timetable.summary == self.timetable.summary + + def validate(self): + """Validate the DAG has a coherent setup. + + This is called by the DAG bag before bagging the DAG. + """ + if not self._check_schedule_interval_matches_timetable(): + raise AirflowDagInconsistent( + f"inconsistent schedule: timetable {self.timetable.summary!r} " + f"does not match schedule_interval {self.schedule_interval!r}", + ) + self.params.validate() + self.timetable.validate() + def __repr__(self): return f"" diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index 3673ce095ea16..929842fd0da4c 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -39,6 +39,7 @@ AirflowClusterPolicyViolation, AirflowDagCycleException, AirflowDagDuplicatedIdException, + AirflowDagInconsistent, AirflowTimetableInvalid, ParamValidationError, ) @@ -402,25 +403,25 @@ def _process_modules(self, filepath, mods, file_last_changed_on_disk): for (dag, mod) in top_level_dags: dag.fileloc = mod.__file__ try: - dag.timetable.validate() - # validate dag params - dag.params.validate() + dag.validate() self.bag_dag(dag=dag, root_dag=dag) - found_dags.append(dag) - found_dags += dag.subdags except AirflowTimetableInvalid as exception: self.log.exception("Failed to bag_dag: %s", dag.fileloc) self.import_errors[dag.fileloc] = f"Invalid timetable expression: {exception}" self.file_last_changed[dag.fileloc] = file_last_changed_on_disk except ( + AirflowClusterPolicyViolation, AirflowDagCycleException, AirflowDagDuplicatedIdException, - AirflowClusterPolicyViolation, + AirflowDagInconsistent, ParamValidationError, ) as exception: self.log.exception("Failed to bag_dag: %s", dag.fileloc) self.import_errors[dag.fileloc] = str(exception) self.file_last_changed[dag.fileloc] = file_last_changed_on_disk + else: + found_dags.append(dag) + found_dags += dag.subdags return found_dags def bag_dag(self, dag, root_dag): @@ -640,6 +641,8 @@ def _sync_perm_for_dag(self, dag, session: Session = None): from airflow.security.permissions import DAG_ACTIONS, resource_name_for_dag from airflow.www.fab_security.sqla.models import Action, Permission, Resource + root_dag_id = dag.parent_dag.dag_id if dag.parent_dag else dag.dag_id + def needs_perms(dag_id: str) -> bool: dag_resource_name = resource_name_for_dag(dag_id) for permission_name in DAG_ACTIONS: @@ -654,9 +657,9 @@ def needs_perms(dag_id: str) -> bool: return True return False - if dag.access_control or needs_perms(dag.dag_id): - self.log.debug("Syncing DAG permissions: %s to the DB", dag.dag_id) + if dag.access_control or needs_perms(root_dag_id): + self.log.debug("Syncing DAG permissions: %s to the DB", root_dag_id) from airflow.www.security import ApplessAirflowSecurityManager security_manager = ApplessAirflowSecurityManager(session=session) - security_manager.sync_perm_for_dag(dag.dag_id, dag.access_control) + security_manager.sync_perm_for_dag(root_dag_id, dag.access_control) diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index fdb566e467150..ad0dcdfebdbb3 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -23,6 +23,7 @@ from typing import ( TYPE_CHECKING, Any, + Callable, Dict, Generator, Iterable, @@ -30,6 +31,7 @@ NamedTuple, Optional, Sequence, + Set, Tuple, Union, cast, @@ -540,7 +542,7 @@ def update_state( ) leaf_task_ids = {t.task_id for t in dag.leaves} - leaf_tis = [ti for ti in tis if ti.task_id in leaf_task_ids] + leaf_tis = [ti for ti in tis if ti.task_id in leaf_task_ids if ti.state != TaskInstanceState.REMOVED] # if all roots finished and at least one failed, the run failed if not unfinished_tis and any(leaf_ti.state in State.failed_states for leaf_ti in leaf_tis): @@ -640,15 +642,9 @@ def task_instance_scheduling_decisions(self, session: Session = NEW_SESSION) -> tis = list(self.get_task_instances(session=session, state=State.task_states)) self.log.debug("number of tis tasks for %s: %s task(s)", self, len(tis)) dag = self.get_dag() - for ti in tis: - try: - ti.task = dag.get_task(ti.task_id) - except TaskNotFound: - self.log.warning( - "Failed to get task '%s' for dag '%s'. Marking it as removed.", ti, ti.dag_id - ) - ti.state = State.REMOVED - session.flush() + missing_indexes = self._find_missing_task_indexes(dag, tis, session=session) + if missing_indexes: + self.verify_integrity(missing_indexes=missing_indexes, session=session) unfinished_tis = [t for t in tis if t.state in State.unfinished] finished_tis = [t for t in tis if t.state in State.finished] @@ -809,22 +805,74 @@ def _emit_duration_stats_for_finished_state(self): Stats.timing(f'dagrun.duration.failed.{self.dag_id}', duration) @provide_session - def verify_integrity(self, session: Session = NEW_SESSION): + def verify_integrity( + self, + *, + missing_indexes: Optional[Dict["MappedOperator", Sequence[int]]] = None, + session: Session = NEW_SESSION, + ): """ Verifies the DagRun by checking for removed tasks or tasks that are not in the database yet. It will set state to removed or add the task if required. + :missing_indexes: A dictionary of task vs indexes that are missing. :param session: Sqlalchemy ORM Session """ from airflow.settings import task_instance_mutation_hook + # Set for the empty default in airflow.settings -- if it's not set this means it has been changed + hook_is_noop = getattr(task_instance_mutation_hook, 'is_noop', False) + dag = self.get_dag() + task_ids: Set[str] = set() + if missing_indexes: + tis = self.get_task_instances(session=session) + for ti in tis: + task_instance_mutation_hook(ti) + task_ids.add(ti.task_id) + else: + task_ids, missing_indexes = self._check_for_removed_or_restored_tasks( + dag, task_instance_mutation_hook, session=session + ) + + def task_filter(task: "Operator") -> bool: + return task.task_id not in task_ids and ( + self.is_backfill + or task.start_date <= self.execution_date + and (task.end_date is None or self.execution_date <= task.end_date) + ) + + created_counts: Dict[str, int] = defaultdict(int) + + # Get task creator function + task_creator = self._get_task_creator(created_counts, task_instance_mutation_hook, hook_is_noop) + + # Create the missing tasks, including mapped tasks + tasks = self._create_missing_tasks(dag, task_creator, task_filter, missing_indexes, session=session) + + self._create_task_instances(dag.dag_id, tasks, created_counts, hook_is_noop, session=session) + + def _check_for_removed_or_restored_tasks( + self, dag: "DAG", ti_mutation_hook, *, session: Session + ) -> Tuple[Set[str], Dict["MappedOperator", Sequence[int]]]: + """ + Check for removed tasks/restored/missing tasks. + + :param dag: DAG object corresponding to the dagrun + :param ti_mutation_hook: task_instance_mutation_hook function + :param session: Sqlalchemy ORM Session + + :return: List of task_ids in the dagrun and missing task indexes + + """ tis = self.get_task_instances(session=session) # check for removed or restored tasks task_ids = set() + existing_indexes: Dict["MappedOperator", List[int]] = defaultdict(list) + expected_indexes: Dict["MappedOperator", Sequence[int]] = defaultdict(list) for ti in tis: - task_instance_mutation_hook(ti) + ti_mutation_hook(ti) task_ids.add(ti.task_id) task = None try: @@ -863,7 +911,8 @@ def verify_integrity(self, session: Session = NEW_SESSION): else: self.log.info("Restoring mapped task '%s'", ti) Stats.incr(f"task_restored_to_dag.{dag.dag_id}", 1, 1) - ti.state = State.NONE + existing_indexes[task].append(ti.map_index) + expected_indexes[task] = range(num_mapped_tis) else: # What if it is _now_ dynamically mapped, but wasn't before? total_length = task.run_time_mapped_ti_count(self.run_id, session=session) @@ -884,20 +933,30 @@ def verify_integrity(self, session: Session = NEW_SESSION): total_length, ) ti.state = State.REMOVED - ... - - def task_filter(task: "Operator") -> bool: - return task.task_id not in task_ids and ( - self.is_backfill - or task.start_date <= self.execution_date - and (task.end_date is None or self.execution_date <= task.end_date) - ) + else: + self.log.info("Restoring mapped task '%s'", ti) + Stats.incr(f"task_restored_to_dag.{dag.dag_id}", 1, 1) + existing_indexes[task].append(ti.map_index) + expected_indexes[task] = range(total_length) + # Check if we have some missing indexes to create ti for + missing_indexes: Dict["MappedOperator", Sequence[int]] = defaultdict(list) + for k, v in existing_indexes.items(): + missing_indexes.update({k: list(set(expected_indexes[k]).difference(v))}) + return task_ids, missing_indexes + + def _get_task_creator( + self, created_counts: Dict[str, int], ti_mutation_hook: Callable, hook_is_noop: bool + ) -> Callable: + """ + Get the task creator function. - created_counts: Dict[str, int] = defaultdict(int) + This function also updates the created_counts dictionary with the number of tasks created. - # Set for the empty default in airflow.settings -- if it's not set this means it has been changed - hook_is_noop = getattr(task_instance_mutation_hook, 'is_noop', False) + :param created_counts: Dictionary of task_type -> count of created TIs + :param ti_mutation_hook: task_instance_mutation_hook function + :param hook_is_noop: Whether the task_instance_mutation_hook is a noop + """ if hook_is_noop: def create_ti_mapping(task: "Operator", indexes: Tuple[int, ...]) -> Generator: @@ -912,14 +971,34 @@ def create_ti_mapping(task: "Operator", indexes: Tuple[int, ...]) -> Generator: def create_ti(task: "Operator", indexes: Tuple[int, ...]) -> Generator: for map_index in indexes: ti = TI(task, run_id=self.run_id, map_index=map_index) - task_instance_mutation_hook(ti) + ti_mutation_hook(ti) created_counts[ti.operator] += 1 yield ti creator = create_ti + return creator + + def _create_missing_tasks( + self, + dag: "DAG", + task_creator: Callable, + task_filter: Callable, + missing_indexes: Optional[Dict["MappedOperator", Sequence[int]]], + *, + session: Session, + ) -> Iterable["Operator"]: + """ + Create missing tasks -- and expand any MappedOperator that _only_ have literals as input + + :param dag: DAG object corresponding to the dagrun + :param task_creator: a function that creates tasks + :param task_filter: a function that filters tasks to create + :param session: the session to use + """ - # Create missing tasks -- and expand any MappedOperator that _only_ have literals as input - def expand_mapped_literals(task: "Operator") -> Tuple["Operator", Sequence[int]]: + def expand_mapped_literals( + task: "Operator", sequence: Union[Sequence[int], None] = None + ) -> Tuple["Operator", Sequence[int]]: if not task.is_mapped: return (task, (-1,)) task = cast("MappedOperator", task) @@ -928,11 +1007,40 @@ def expand_mapped_literals(task: "Operator") -> Tuple["Operator", Sequence[int]] ) if not count: return (task, (-1,)) + if sequence: + return (task, sequence) return (task, range(count)) tasks_and_map_idxs = map(expand_mapped_literals, filter(task_filter, dag.task_dict.values())) - tasks = itertools.chain.from_iterable(itertools.starmap(creator, tasks_and_map_idxs)) + tasks = itertools.chain.from_iterable(itertools.starmap(task_creator, tasks_and_map_idxs)) + if missing_indexes: + # If there are missing indexes, override the tasks to create + new_tasks_and_map_idxs = itertools.starmap( + expand_mapped_literals, [(k, v) for k, v in missing_indexes.items() if len(v) > 0] + ) + tasks = itertools.chain.from_iterable(itertools.starmap(task_creator, new_tasks_and_map_idxs)) + return tasks + + def _create_task_instances( + self, + dag_id: str, + tasks: Iterable["Operator"], + created_counts: Dict[str, int], + hook_is_noop: bool, + *, + session: Session, + ) -> None: + """ + Create the necessary task instances from the given tasks. + + :param dag_id: DAG ID associated with the dagrun + :param tasks: the tasks to create the task instances from + :param created_counts: a dictionary of number of tasks -> total ti created by the task creator + :param hook_is_noop: whether the task_instance_mutation_hook is noop + :param session: the session to use + + """ try: if hook_is_noop: session.bulk_insert_mappings(TI, tasks) @@ -945,7 +1053,7 @@ def expand_mapped_literals(task: "Operator") -> Tuple["Operator", Sequence[int]] except IntegrityError: self.log.info( 'Hit IntegrityError while creating the TIs for %s- %s', - dag.dag_id, + dag_id, self.run_id, exc_info=True, ) @@ -953,6 +1061,42 @@ def expand_mapped_literals(task: "Operator") -> Tuple["Operator", Sequence[int]] # TODO[HA]: We probably need to savepoint this so we can keep the transaction alive. session.rollback() + def _find_missing_task_indexes(self, dag, tis, *, session) -> Dict["MappedOperator", Sequence[int]]: + """ + Here we check if the length of the mapped task instances changed + at runtime. If so, we find the missing indexes. + + This function also marks task instances with missing tasks as REMOVED. + + :param dag: DAG object corresponding to the dagrun + :param tis: task instances to check + :param session: the session to use + """ + existing_indexes: Dict["MappedOperator", list] = defaultdict(list) + new_indexes: Dict["MappedOperator", Sequence[int]] = defaultdict(list) + for ti in tis: + try: + task = ti.task = dag.get_task(ti.task_id) + except TaskNotFound: + self.log.error("Failed to get task '%s' for dag '%s'. Marking it as removed.", ti, ti.dag_id) + + ti.state = State.REMOVED + session.flush() + continue + if not task.is_mapped: + continue + # skip unexpanded tasks and also tasks that expands with literal arguments + if ti.map_index < 0 or task.parse_time_mapped_ti_count: + continue + existing_indexes[task].append(ti.map_index) + task.run_time_mapped_ti_count.cache_clear() + new_length = task.run_time_mapped_ti_count(self.run_id, session=session) or 0 + new_indexes[task] = range(new_length) + missing_indexes: Dict["MappedOperator", Sequence[int]] = defaultdict(list) + for k, v in existing_indexes.items(): + missing_indexes.update({k: list(set(new_indexes[k]).difference(v))}) + return missing_indexes + @staticmethod def get_run(session: Session, dag_id: str, execution_date: datetime) -> Optional['DagRun']: """ @@ -1065,14 +1209,23 @@ def schedule_tis(self, schedulable_tis: Iterable[TI], session: Session = NEW_SES return count @provide_session - def get_log_filename_template(self, *, session: Session = NEW_SESSION) -> str: + def get_log_template(self, *, session: Session = NEW_SESSION) -> LogTemplate: if self.log_template_id is None: # DagRun created before LogTemplate introduction. - template = session.query(LogTemplate.filename).order_by(LogTemplate.id).limit(1).scalar() + template = session.query(LogTemplate).order_by(LogTemplate.id).first() else: - template = session.query(LogTemplate.filename).filter_by(id=self.log_template_id).scalar() + template = session.query(LogTemplate).get(self.log_template_id) if template is None: raise AirflowException( f"No log_template entry found for ID {self.log_template_id!r}. " f"Please make sure you set up the metadatabase correctly." ) return template + + @provide_session + def get_log_filename_template(self, *, session: Session = NEW_SESSION) -> str: + warnings.warn( + "This method is deprecated. Please use get_log_template instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.get_log_template(session=session).filename diff --git a/airflow/models/taskfail.py b/airflow/models/taskfail.py index f7de99c308cac..b5f23d8ec56d1 100644 --- a/airflow/models/taskfail.py +++ b/airflow/models/taskfail.py @@ -17,7 +17,7 @@ # under the License. """Taskfail tracks the failed run durations of each task instance""" -from sqlalchemy import Column, ForeignKeyConstraint, Integer +from sqlalchemy import Column, ForeignKeyConstraint, Index, Integer from sqlalchemy.orm import relationship from airflow.models.base import Base, StringID @@ -39,6 +39,7 @@ class TaskFail(Base): duration = Column(Integer) __table_args__ = ( + Index("idx_task_fail_task_instance", dag_id, task_id, run_id, map_index), ForeignKeyConstraint( [dag_id, task_id, run_id, map_index], [ diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 5cd582ce3e34d..fe3387ecf0025 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -428,6 +428,20 @@ def key(self) -> "TaskInstanceKey": return self +def _executor_config_comparator(x, y): + """ + The TaskInstance.executor_config attribute is a pickled object that may contain + kubernetes objects. If the installed library version has changed since the + object was originally pickled, due to the underlying ``__eq__`` method on these + objects (which converts them to JSON), we may encounter attribute errors. In this + case we should replace the stored object. + """ + try: + return x == y + except AttributeError: + return False + + class TaskInstance(Base, LoggingMixin): """ Task instances store the state of a task instance. This table is the @@ -470,7 +484,7 @@ class TaskInstance(Base, LoggingMixin): queued_dttm = Column(UtcDateTime) queued_by_job_id = Column(Integer) pid = Column(Integer) - executor_config = Column(PickleType(pickler=dill)) + executor_config = Column(PickleType(pickler=dill, comparator=_executor_config_comparator)) external_executor_id = Column(String(ID_LEN, **COLLATION_ARGS)) @@ -799,7 +813,13 @@ def log_url(self): """Log URL for TaskInstance""" iso = quote(self.execution_date.isoformat()) base_url = conf.get('webserver', 'BASE_URL') - return base_url + f"/log?execution_date={iso}&task_id={self.task_id}&dag_id={self.dag_id}" + return ( + f"{base_url}/log" + f"?execution_date={iso}" + f"&task_id={self.task_id}" + f"&dag_id={self.dag_id}" + f"&map_index={self.map_index}" + ) @property def mark_success_url(self): diff --git a/airflow/models/taskreschedule.py b/airflow/models/taskreschedule.py index 518f1e77ff65f..132554d8d1760 100644 --- a/airflow/models/taskreschedule.py +++ b/airflow/models/taskreschedule.py @@ -61,6 +61,7 @@ class TaskReschedule(Base): name="task_reschedule_ti_fkey", ondelete="CASCADE", ), + Index('idx_task_reschedule_dag_run', dag_id, run_id), ForeignKeyConstraint( [dag_id, run_id], ['dag_run.dag_id', 'dag_run.run_id'], diff --git a/airflow/models/xcom.py b/airflow/models/xcom.py index d67160d1fa901..aad720bd8b421 100644 --- a/airflow/models/xcom.py +++ b/airflow/models/xcom.py @@ -72,6 +72,7 @@ class BaseXCom(Base, LoggingMixin): # but it goes over MySQL's index length limit. So we instead index 'key' # separately, and enforce uniqueness with DagRun.id instead. Index("idx_xcom_key", key), + Index("idx_xcom_task_instance", dag_id, task_id, run_id, map_index), ForeignKeyConstraint( [dag_id, task_id, run_id, map_index], [ diff --git a/airflow/operators/datetime.py b/airflow/operators/datetime.py index c37a4f9d50c11..c5a423d563868 100644 --- a/airflow/operators/datetime.py +++ b/airflow/operators/datetime.py @@ -16,6 +16,7 @@ # under the License. import datetime +import warnings from typing import Iterable, Union from airflow.exceptions import AirflowException @@ -39,7 +40,7 @@ class BranchDateTimeOperator(BaseBranchOperator): ``datetime.datetime.now()`` falls below target_lower or above ``target_upper``. :param target_lower: target lower bound. :param target_upper: target upper bound. - :param use_task_execution_date: If ``True``, uses task's execution day to compare with targets. + :param use_task_logical_date: If ``True``, uses task's logical date to compare with targets. Execution date is useful for backfilling. If ``False``, uses system's date. """ @@ -50,6 +51,7 @@ def __init__( follow_task_ids_if_false: Union[str, Iterable[str]], target_lower: Union[datetime.datetime, datetime.time, None], target_upper: Union[datetime.datetime, datetime.time, None], + use_task_logical_date: bool = False, use_task_execution_date: bool = False, **kwargs, ) -> None: @@ -64,10 +66,17 @@ def __init__( self.target_upper = target_upper self.follow_task_ids_if_true = follow_task_ids_if_true self.follow_task_ids_if_false = follow_task_ids_if_false - self.use_task_execution_date = use_task_execution_date + self.use_task_logical_date = use_task_logical_date + if use_task_execution_date: + self.use_task_logical_date = use_task_execution_date + warnings.warn( + "Parameter ``use_task_execution_date`` is deprecated. Use ``use_task_logical_date``.", + DeprecationWarning, + stacklevel=2, + ) def choose_branch(self, context: Context) -> Union[str, Iterable[str]]: - if self.use_task_execution_date is True: + if self.use_task_logical_date: now = timezone.make_naive(context["logical_date"], self.dag.timezone) else: now = timezone.make_naive(timezone.utcnow(), self.dag.timezone) diff --git a/airflow/operators/trigger_dagrun.py b/airflow/operators/trigger_dagrun.py index 0689f14c56261..4578fd2df818b 100644 --- a/airflow/operators/trigger_dagrun.py +++ b/airflow/operators/trigger_dagrun.py @@ -23,7 +23,10 @@ from airflow.api.common.trigger_dag import trigger_dag from airflow.exceptions import AirflowException, DagNotFound, DagRunAlreadyExists -from airflow.models import BaseOperator, BaseOperatorLink, DagBag, DagModel, DagRun +from airflow.models.baseoperator import BaseOperator, BaseOperatorLink +from airflow.models.dag import DagModel +from airflow.models.dagbag import DagBag +from airflow.models.dagrun import DagRun from airflow.models.xcom import XCom from airflow.utils import timezone from airflow.utils.context import Context diff --git a/airflow/operators/weekday.py b/airflow/operators/weekday.py index fb35079fe0d3e..b23d57e9fb1d4 100644 --- a/airflow/operators/weekday.py +++ b/airflow/operators/weekday.py @@ -15,7 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import warnings from typing import Iterable, Union from airflow.operators.branch import BaseBranchOperator @@ -41,7 +41,7 @@ class BranchDayOfWeekOperator(BaseBranchOperator): * ``{WeekDay.TUESDAY}`` * ``{WeekDay.SATURDAY, WeekDay.SUNDAY}`` - :param use_task_execution_day: If ``True``, uses task's execution day to compare + :param use_task_logical_date: If ``True``, uses task's logical date to compare with is_today. Execution Date is Useful for backfilling. If ``False``, uses system's day of the week. """ @@ -52,6 +52,7 @@ def __init__( follow_task_ids_if_true: Union[str, Iterable[str]], follow_task_ids_if_false: Union[str, Iterable[str]], week_day: Union[str, Iterable[str]], + use_task_logical_date: bool = False, use_task_execution_day: bool = False, **kwargs, ) -> None: @@ -59,11 +60,18 @@ def __init__( self.follow_task_ids_if_true = follow_task_ids_if_true self.follow_task_ids_if_false = follow_task_ids_if_false self.week_day = week_day - self.use_task_execution_day = use_task_execution_day + self.use_task_logical_date = use_task_logical_date + if use_task_execution_day: + self.use_task_logical_date = use_task_execution_day + warnings.warn( + "Parameter ``use_task_execution_day`` is deprecated. Use ``use_task_logical_date``.", + DeprecationWarning, + stacklevel=2, + ) self._week_day_num = WeekDay.validate_week_day(week_day) def choose_branch(self, context: Context) -> Union[str, Iterable[str]]: - if self.use_task_execution_day: + if self.use_task_logical_date: now = context["logical_date"] else: now = timezone.make_naive(timezone.utcnow(), self.dag.timezone) diff --git a/airflow/providers/alibaba/cloud/log/oss_task_handler.py b/airflow/providers/alibaba/cloud/log/oss_task_handler.py index d26bfbfd048dc..ec61972ffcd43 100644 --- a/airflow/providers/alibaba/cloud/log/oss_task_handler.py +++ b/airflow/providers/alibaba/cloud/log/oss_task_handler.py @@ -36,7 +36,7 @@ class OSSTaskHandler(FileTaskHandler, LoggingMixin): uploads to and reads from OSS remote storage. """ - def __init__(self, base_log_folder, oss_log_folder, filename_template): + def __init__(self, base_log_folder, oss_log_folder, filename_template=None): self.log.info("Using oss_task_handler for remote logging...") super().__init__(base_log_folder, filename_template) (self.bucket_name, self.base_folder) = OSSHook.parse_oss_url(oss_log_folder) diff --git a/airflow/providers/amazon/aws/example_dags/example_google_api_youtube_to_s3.py b/airflow/providers/amazon/aws/example_dags/example_google_api_youtube_to_s3.py index 241a551320efd..2bbe4fac58e2c 100644 --- a/airflow/providers/amazon/aws/example_dags/example_google_api_youtube_to_s3.py +++ b/airflow/providers/amazon/aws/example_dags/example_google_api_youtube_to_s3.py @@ -26,7 +26,8 @@ Further information: YOUTUBE_VIDEO_PUBLISHED_AFTER and YOUTUBE_VIDEO_PUBLISHED_BEFORE needs to be formatted -"YYYY-MM-DDThh:mm:ss.sZ". See https://developers.google.com/youtube/v3/docs/search/list for more information. +``YYYY-MM-DDThh:mm:ss.sZ``. +See https://developers.google.com/youtube/v3/docs/search/list for more information. YOUTUBE_VIDEO_PARTS depends on the fields you pass via YOUTUBE_VIDEO_FIELDS. See https://developers.google.com/youtube/v3/docs/videos/list#parameters for more information. YOUTUBE_CONN_ID is optional for public videos. It does only need to authenticate when there are private videos diff --git a/airflow/providers/amazon/aws/example_dags/example_s3.py b/airflow/providers/amazon/aws/example_dags/example_s3.py index ecd9d374cf688..83a6825d72fe0 100644 --- a/airflow/providers/amazon/aws/example_dags/example_s3.py +++ b/airflow/providers/amazon/aws/example_dags/example_s3.py @@ -62,12 +62,9 @@ # [START howto_sensor_s3_key_function_definition] def check_fn(files: List) -> bool: """ - Example of custom check: check if all files are bigger than 1kB + Example of custom check: check if all files are bigger than ``1kB`` :param files: List of S3 object attributes. - Format: [{ - 'Size': int - }] :return: true if the criteria is met :rtype: bool """ diff --git a/airflow/providers/amazon/aws/hooks/batch_waiters.py b/airflow/providers/amazon/aws/hooks/batch_waiters.py index 59ba0e431f25e..3d1cd9d4b468d 100644 --- a/airflow/providers/amazon/aws/hooks/batch_waiters.py +++ b/airflow/providers/amazon/aws/hooks/batch_waiters.py @@ -71,16 +71,12 @@ class BatchWaitersHook(BatchClientHook): # and the details of the config on that waiter can be further modified without any # accidental impact on the generation of new waiters from the defined waiter_model, e.g. waiters.get_waiter("JobExists").config.delay # -> 5 - waiter = waiters.get_waiter( - "JobExists" - ) # -> botocore.waiter.Batch.Waiter.JobExists object + waiter = waiters.get_waiter("JobExists") # -> botocore.waiter.Batch.Waiter.JobExists object waiter.config.delay = 10 waiters.get_waiter("JobExists").config.delay # -> 5 as defined by waiter_model # To use a specific waiter, update the config and call the `wait()` method for jobId, e.g. - waiter = waiters.get_waiter( - "JobExists" - ) # -> botocore.waiter.Batch.Waiter.JobExists object + waiter = waiters.get_waiter("JobExists") # -> botocore.waiter.Batch.Waiter.JobExists object waiter.config.delay = random.uniform(1, 10) # seconds waiter.config.max_attempts = 10 waiter.wait(jobs=[jobId]) diff --git a/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py b/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py index c975a2cb83fc6..7d4f81006b380 100644 --- a/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +++ b/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py @@ -17,6 +17,7 @@ # under the License. import sys from datetime import datetime +from typing import Optional import watchtower @@ -42,7 +43,7 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin): :param filename_template: template for file name (local storage) or log stream name (remote) """ - def __init__(self, base_log_folder: str, log_group_arn: str, filename_template: str): + def __init__(self, base_log_folder: str, log_group_arn: str, filename_template: Optional[str] = None): super().__init__(base_log_folder, filename_template) split_arn = log_group_arn.split(':') diff --git a/airflow/providers/amazon/aws/log/s3_task_handler.py b/airflow/providers/amazon/aws/log/s3_task_handler.py index 695c4623d97b2..0abea94c665c0 100644 --- a/airflow/providers/amazon/aws/log/s3_task_handler.py +++ b/airflow/providers/amazon/aws/log/s3_task_handler.py @@ -17,6 +17,7 @@ # under the License. import os import sys +from typing import Optional if sys.version_info >= (3, 8): from functools import cached_property @@ -35,7 +36,7 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin): uploads to and reads from S3 remote storage. """ - def __init__(self, base_log_folder: str, s3_log_folder: str, filename_template: str): + def __init__(self, base_log_folder: str, s3_log_folder: str, filename_template: Optional[str] = None): super().__init__(base_log_folder, filename_template) self.remote_base = s3_log_folder self.log_relative_path = '' diff --git a/airflow/providers/arangodb/example_dags/example_arangodb.py b/airflow/providers/arangodb/example_dags/example_arangodb.py index f9da187cfb665..37a8250dfd16a 100644 --- a/airflow/providers/arangodb/example_dags/example_arangodb.py +++ b/airflow/providers/arangodb/example_dags/example_arangodb.py @@ -41,7 +41,7 @@ # [START howto_aql_sensor_template_file_arangodb] -sensor = AQLSensor( +sensor2 = AQLSensor( task_id="aql_sensor_template_file", query="search_judy.sql", timeout=60, @@ -65,7 +65,7 @@ # [START howto_aql_operator_template_file_arangodb] -operator = AQLOperator( +operator2 = AQLOperator( task_id='aql_operator_template_file', dag=dag, result_processor=lambda cursor: print([document["name"] for document in cursor]), diff --git a/airflow/providers/elasticsearch/example_dags/example_elasticsearch_query.py b/airflow/providers/elasticsearch/example_dags/example_elasticsearch_query.py deleted file mode 100644 index d4a7b100f00f6..0000000000000 --- a/airflow/providers/elasticsearch/example_dags/example_elasticsearch_query.py +++ /dev/null @@ -1,50 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from datetime import datetime, timedelta - -from airflow import DAG -from airflow.decorators import task -from airflow.providers.elasticsearch.hooks.elasticsearch import ElasticsearchHook - - -@task(task_id='es_print_tables') -def show_tables(): - """ - show_tables queries elasticsearch to list available tables - """ - es = ElasticsearchHook(elasticsearch_conn_id='production-es') - - # Handle ES conn with context manager - with es.get_conn() as es_conn: - tables = es_conn.execute('SHOW TABLES') - for table, *_ in tables: - print(f"table: {table}") - return True - - -# Using a DAG context manager, you don't have to specify the dag property of each task -with DAG( - 'elasticsearch_dag', - start_date=datetime(2021, 8, 30), - max_active_runs=1, - schedule_interval=timedelta(days=1), - default_args={'retries': 1}, # Default setting applied to all tasks - catchup=False, -) as dag: - - show_tables() diff --git a/airflow/providers/elasticsearch/log/es_task_handler.py b/airflow/providers/elasticsearch/log/es_task_handler.py index 83c1163d80c87..64fce0df53c15 100644 --- a/airflow/providers/elasticsearch/log/es_task_handler.py +++ b/airflow/providers/elasticsearch/log/es_task_handler.py @@ -18,6 +18,7 @@ import logging import sys +import warnings from collections import defaultdict from datetime import datetime from operator import attrgetter @@ -31,15 +32,22 @@ from elasticsearch_dsl import Search from airflow.configuration import conf -from airflow.models import TaskInstance +from airflow.models.dagrun import DagRun +from airflow.models.taskinstance import TaskInstance from airflow.utils import timezone from airflow.utils.log.file_task_handler import FileTaskHandler from airflow.utils.log.json_formatter import JSONFormatter from airflow.utils.log.logging_mixin import ExternalLoggingMixin, LoggingMixin +from airflow.utils.session import create_session # Elasticsearch hosted log type EsLogMsgType = List[Tuple[str, str]] +# Compatibility: Airflow 2.3.3 and up uses this method, which accesses the +# LogTemplate model to record the log ID template used. If this function does +# not exist, the task handler should use the log_id_template attribute instead. +USE_PER_RUN_LOG_ID = hasattr(DagRun, "get_log_template") + class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMixin): """ @@ -65,8 +73,6 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix def __init__( self, base_log_folder: str, - filename_template: str, - log_id_template: str, end_of_log_mark: str, write_stdout: bool, json_format: bool, @@ -76,6 +82,9 @@ def __init__( host: str = "localhost:9200", frontend: str = "localhost:5601", es_kwargs: Optional[dict] = conf.getsection("elasticsearch_configs"), + *, + filename_template: Optional[str] = None, + log_id_template: Optional[str] = None, ): """ :param base_log_folder: base folder to store logs locally @@ -88,7 +97,13 @@ def __init__( self.client = elasticsearch.Elasticsearch([host], **es_kwargs) # type: ignore[attr-defined] - self.log_id_template = log_id_template + if USE_PER_RUN_LOG_ID and log_id_template is not None: + warnings.warn( + "Passing log_id_template to ElasticsearchTaskHandler is deprecated and has no effect", + DeprecationWarning, + ) + + self.log_id_template = log_id_template # Only used on Airflow < 2.3.2. self.frontend = frontend self.mark_end_on_close = True self.end_of_log_mark = end_of_log_mark @@ -103,7 +118,13 @@ def __init__( self.handler: Union[logging.FileHandler, logging.StreamHandler] # type: ignore[assignment] def _render_log_id(self, ti: TaskInstance, try_number: int) -> str: - dag_run = ti.get_dagrun() + with create_session() as session: + dag_run = ti.get_dagrun(session=session) + if USE_PER_RUN_LOG_ID: + log_id_template = dag_run.get_log_template(session=session).elasticsearch_id + else: + log_id_template = self.log_id_template + dag = ti.task.dag assert dag is not None # For Mypy. try: @@ -126,7 +147,7 @@ def _render_log_id(self, ti: TaskInstance, try_number: int) -> str: data_interval_end = "" execution_date = dag_run.execution_date.isoformat() - return self.log_id_template.format( + return log_id_template.format( dag_id=ti.dag_id, task_id=ti.task_id, run_id=getattr(ti, "run_id", ""), diff --git a/airflow/providers/google/CHANGELOG.rst b/airflow/providers/google/CHANGELOG.rst index 2023490d984dd..d96096c4dab65 100644 --- a/airflow/providers/google/CHANGELOG.rst +++ b/airflow/providers/google/CHANGELOG.rst @@ -711,8 +711,7 @@ now the snake_case convention is used. set_acl_permission = GCSBucketCreateAclEntryOperator( task_id="gcs-set-acl-permission", bucket=BUCKET_NAME, - entity="user-{{ task_instance.xcom_pull('get-instance')['persistenceIamIdentity']" - ".split(':', 2)[1] }}", + entity="user-{{ task_instance.xcom_pull('get-instance')['persistenceIamIdentity'].split(':', 2)[1] }}", role="OWNER", ) diff --git a/airflow/providers/google/cloud/example_dags/example_automl_tables.py b/airflow/providers/google/cloud/example_dags/example_automl_tables.py index 9ba0314dae777..c13de99fa8512 100644 --- a/airflow/providers/google/cloud/example_dags/example_automl_tables.py +++ b/airflow/providers/google/cloud/example_dags/example_automl_tables.py @@ -204,14 +204,14 @@ def get_target_column_spec(columns_specs: List[Dict], column_name: str) -> str: catchup=False, user_defined_macros={"extract_object_id": extract_object_id}, ) as example_dag: - create_dataset_task = AutoMLCreateDatasetOperator( + create_dataset_task2 = AutoMLCreateDatasetOperator( task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION, project_id=GCP_PROJECT_ID, ) - dataset_id = create_dataset_task.output['dataset_id'] + dataset_id = create_dataset_task2.output['dataset_id'] import_dataset_task = AutoMLImportDataOperator( task_id="import_dataset_task", diff --git a/airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py b/airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py index be858c4018753..bf73959d4ff72 100644 --- a/airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py +++ b/airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py @@ -17,17 +17,15 @@ # under the License. """ -Example Airflow DAG that demonstrates interactions with Google Cloud Transfer. +Example Airflow DAG that demonstrates interactions with Google Cloud Transfer. This DAG relies on +the following OS environment variables - -This DAG relies on the following OS environment variables +Note that you need to provide a large enough set of data so that operations do not execute too quickly. +Otherwise, DAG will fail. * GCP_PROJECT_ID - Google Cloud Project to use for the Google Cloud Transfer Service. * GCP_DESCRIPTION - Description of transfer job * GCP_TRANSFER_SOURCE_AWS_BUCKET - Amazon Web Services Storage bucket from which files are copied. - .. warning:: - You need to provide a large enough set of data so that operations do not execute too quickly. - Otherwise, DAG will fail. * GCP_TRANSFER_SECOND_TARGET_BUCKET - Google Cloud Storage bucket to which files are copied * WAIT_FOR_OPERATION_POKE_INTERVAL - interval of what to check the status of the operation A smaller value than the default value accelerates the system test and ensures its correct execution with diff --git a/airflow/providers/google/cloud/example_dags/example_pubsub.py b/airflow/providers/google/cloud/example_dags/example_pubsub.py index 8e3dd1fe8f01e..05ae16bd68f35 100644 --- a/airflow/providers/google/cloud/example_dags/example_pubsub.py +++ b/airflow/providers/google/cloud/example_dags/example_pubsub.py @@ -56,7 +56,7 @@ catchup=False, ) as example_sensor_dag: # [START howto_operator_gcp_pubsub_create_topic] - create_topic = PubSubCreateTopicOperator( + create_topic1 = PubSubCreateTopicOperator( task_id="create_topic", topic=TOPIC_FOR_SENSOR_DAG, project_id=GCP_PROJECT_ID, fail_if_exists=False ) # [END howto_operator_gcp_pubsub_create_topic] @@ -105,7 +105,7 @@ ) # [END howto_operator_gcp_pubsub_delete_topic] - create_topic >> subscribe_task >> publish_task + create_topic1 >> subscribe_task >> publish_task pull_messages >> pull_messages_result >> unsubscribe_task >> delete_topic # Task dependencies created via `XComArgs`: @@ -120,7 +120,7 @@ catchup=False, ) as example_operator_dag: # [START howto_operator_gcp_pubsub_create_topic] - create_topic = PubSubCreateTopicOperator( + create_topic2 = PubSubCreateTopicOperator( task_id="create_topic", topic=TOPIC_FOR_OPERATOR_DAG, project_id=GCP_PROJECT_ID ) # [END howto_operator_gcp_pubsub_create_topic] @@ -170,7 +170,7 @@ # [END howto_operator_gcp_pubsub_delete_topic] ( - create_topic + create_topic2 >> subscribe_task >> publish_task >> pull_messages_operator diff --git a/airflow/providers/google/cloud/example_dags/example_vertex_ai.py b/airflow/providers/google/cloud/example_dags/example_vertex_ai.py index cded48ae9b4de..a421c31a7ee2c 100644 --- a/airflow/providers/google/cloud/example_dags/example_vertex_ai.py +++ b/airflow/providers/google/cloud/example_dags/example_vertex_ai.py @@ -26,15 +26,16 @@ This DAG relies on the following OS environment variables: * GCP_VERTEX_AI_BUCKET - Google Cloud Storage bucket where the model will be saved -after training process was finished. + after training process was finished. * CUSTOM_CONTAINER_URI - path to container with model. * PYTHON_PACKAGE_GSC_URI - path to test model in archive. * LOCAL_TRAINING_SCRIPT_PATH - path to local training script. * DATASET_ID - ID of dataset which will be used in training process. * MODEL_ID - ID of model which will be used in predict process. * MODEL_ARTIFACT_URI - The artifact_uri should be the path to a GCS directory containing saved model -artifacts. + artifacts. """ + import os from datetime import datetime from uuid import uuid4 diff --git a/airflow/providers/google/cloud/log/gcs_task_handler.py b/airflow/providers/google/cloud/log/gcs_task_handler.py index 92d133d109af5..81f1426d75154 100644 --- a/airflow/providers/google/cloud/log/gcs_task_handler.py +++ b/airflow/providers/google/cloud/log/gcs_task_handler.py @@ -67,7 +67,7 @@ def __init__( *, base_log_folder: str, gcs_log_folder: str, - filename_template: str, + filename_template: Optional[str] = None, gcp_key_path: Optional[str] = None, gcp_keyfile_dict: Optional[dict] = None, gcp_scopes: Optional[Collection[str]] = _DEFAULT_SCOPESS, diff --git a/airflow/providers/google/common/auth_backend/google_openid.py b/airflow/providers/google/common/auth_backend/google_openid.py index 496ac29616686..a267c0e63a1ca 100644 --- a/airflow/providers/google/common/auth_backend/google_openid.py +++ b/airflow/providers/google/common/auth_backend/google_openid.py @@ -88,7 +88,7 @@ def _verify_id_token(id_token: str) -> Optional[str]: def _lookup_user(user_email: str): - security_manager = current_app.appbuilder.sm + security_manager = current_app.appbuilder.sm # type: ignore[attr-defined] user = security_manager.find_user(email=user_email) if not user: diff --git a/airflow/providers/microsoft/azure/log/wasb_task_handler.py b/airflow/providers/microsoft/azure/log/wasb_task_handler.py index 9ec0cdf646fc4..f5e89c2c21a68 100644 --- a/airflow/providers/microsoft/azure/log/wasb_task_handler.py +++ b/airflow/providers/microsoft/azure/log/wasb_task_handler.py @@ -44,8 +44,9 @@ def __init__( base_log_folder: str, wasb_log_folder: str, wasb_container: str, - filename_template: str, delete_local_copy: str, + *, + filename_template: Optional[str] = None, ) -> None: super().__init__(base_log_folder, filename_template) self.wasb_container = wasb_container diff --git a/airflow/providers/mongo/hooks/mongo.py b/airflow/providers/mongo/hooks/mongo.py index 96a5ec800302a..c022ec4135ebd 100644 --- a/airflow/providers/mongo/hooks/mongo.py +++ b/airflow/providers/mongo/hooks/mongo.py @@ -266,7 +266,7 @@ def replace_many( :param mongo_collection: The name of the collection to update. :param docs: The new documents. :param filter_docs: A list of queries that match the documents to replace. - Can be omitted; then the _id fields from docs will be used. + Can be omitted; then the _id fields from airflow.docs will be used. :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. :param upsert: If ``True``, perform an insert if no documents diff --git a/airflow/providers/oracle/hooks/oracle.py b/airflow/providers/oracle/hooks/oracle.py index 84c0f5d6a1e33..14e2f81cafba7 100644 --- a/airflow/providers/oracle/hooks/oracle.py +++ b/airflow/providers/oracle/hooks/oracle.py @@ -72,12 +72,7 @@ def get_conn(self) -> 'OracleHook': .. code-block:: python - { - "dsn": ( - "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)" - "(HOST=host)(PORT=1521))(CONNECT_DATA=(SID=sid)))" - ) - } + {"dsn": ("(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=host)(PORT=1521))(CONNECT_DATA=(SID=sid)))")} see more param detail in `cx_Oracle.connect `_ diff --git a/airflow/security/kerberos.py b/airflow/security/kerberos.py index e8fc86af7259c..55ad22d8ce62d 100644 --- a/airflow/security/kerberos.py +++ b/airflow/security/kerberos.py @@ -34,13 +34,13 @@ """Kerberos security provider""" import logging import shlex -import socket import subprocess import sys import time from typing import List, Optional from airflow.configuration import conf +from airflow.utils.net import get_hostname NEED_KRB181_WORKAROUND = None # type: Optional[bool] @@ -60,7 +60,7 @@ def renew_from_kt(principal: Optional[str], keytab: str, exit_on_fail: bool = Tr renewal_lifetime = f"{conf.getint('kerberos', 'reinit_frequency')}m" cmd_principal = principal or conf.get_mandatory_value('kerberos', 'principal').replace( - "_HOST", socket.getfqdn() + "_HOST", get_hostname() ) if conf.getboolean('kerberos', 'forwardable'): @@ -143,7 +143,7 @@ def perform_krb181_workaround(principal: str): ret = subprocess.call(cmdv, close_fds=True) if ret != 0: - principal = f"{principal or conf.get('kerberos', 'principal')}/{socket.getfqdn()}" + principal = f"{principal or conf.get('kerberos', 'principal')}/{get_hostname()}" ccache = conf.get('kerberos', 'ccache') log.error( "Couldn't renew kerberos ticket in order to work around Kerberos 1.8.1 issue. Please check that " diff --git a/airflow/security/permissions.py b/airflow/security/permissions.py index 2d5c0b939976e..2d02c773b43ff 100644 --- a/airflow/security/permissions.py +++ b/airflow/security/permissions.py @@ -66,14 +66,15 @@ DAG_ACTIONS = {ACTION_CAN_READ, ACTION_CAN_EDIT, ACTION_CAN_DELETE} -def resource_name_for_dag(dag_id): - """Returns the resource name for a DAG id.""" - if dag_id == RESOURCE_DAG: - return dag_id +def resource_name_for_dag(root_dag_id: str) -> str: + """Returns the resource name for a DAG id. - if dag_id.startswith(RESOURCE_DAG_PREFIX): - return dag_id - - # To account for SubDags - root_dag_id = dag_id.split(".")[0] + Note that since a sub-DAG should follow the permission of its + parent DAG, you should pass ``DagModel.root_dag_id`` to this function, + for a subdag. A normal dag should pass the ``DagModel.dag_id``. + """ + if root_dag_id == RESOURCE_DAG: + return root_dag_id + if root_dag_id.startswith(RESOURCE_DAG_PREFIX): + return root_dag_id return f"{RESOURCE_DAG_PREFIX}{root_dag_id}" diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index 40c0a7a5665b7..30c27c7214dc7 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -23,7 +23,11 @@ from sqlalchemy import func from airflow.exceptions import AirflowException -from airflow.models import BaseOperatorLink, DagBag, DagModel, DagRun, TaskInstance +from airflow.models.baseoperator import BaseOperatorLink +from airflow.models.dag import DagModel +from airflow.models.dagbag import DagBag +from airflow.models.dagrun import DagRun +from airflow.models.taskinstance import TaskInstance from airflow.operators.empty import EmptyOperator from airflow.sensors.base import BaseSensorOperator from airflow.utils.helpers import build_airflow_url_with_query diff --git a/airflow/sensors/weekday.py b/airflow/sensors/weekday.py index bdf9275e107b5..5bb4db646f7c4 100644 --- a/airflow/sensors/weekday.py +++ b/airflow/sensors/weekday.py @@ -15,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import warnings from airflow.sensors.base import BaseSensorOperator from airflow.utils import timezone @@ -33,7 +34,7 @@ class DayOfWeekSensor(BaseSensorOperator): weekend_check = DayOfWeekSensor( task_id='weekend_check', week_day='Saturday', - use_task_execution_day=True, + use_task_logical_date=True, dag=dag) **Example** (with multiple day using set): :: @@ -41,7 +42,7 @@ class DayOfWeekSensor(BaseSensorOperator): weekend_check = DayOfWeekSensor( task_id='weekend_check', week_day={'Saturday', 'Sunday'}, - use_task_execution_day=True, + use_task_logical_date=True, dag=dag) **Example** (with :class:`~airflow.utils.weekday.WeekDay` enum): :: @@ -52,7 +53,7 @@ class DayOfWeekSensor(BaseSensorOperator): weekend_check = DayOfWeekSensor( task_id='weekend_check', week_day={WeekDay.SATURDAY, WeekDay.SUNDAY}, - use_task_execution_day=True, + use_task_logical_date=True, dag=dag) :param week_day: Day of the week to check (full name). Optionally, a set @@ -64,16 +65,23 @@ class DayOfWeekSensor(BaseSensorOperator): * ``{WeekDay.TUESDAY}`` * ``{WeekDay.SATURDAY, WeekDay.SUNDAY}`` - :param use_task_execution_day: If ``True``, uses task's execution day to compare + :param use_task_logical_date: If ``True``, uses task's logical date to compare with week_day. Execution Date is Useful for backfilling. If ``False``, uses system's day of the week. Useful when you don't want to run anything on weekdays on the system. """ - def __init__(self, *, week_day, use_task_execution_day=False, **kwargs): + def __init__(self, *, week_day, use_task_logical_date=False, use_task_execution_day=False, **kwargs): super().__init__(**kwargs) self.week_day = week_day - self.use_task_execution_day = use_task_execution_day + self.use_task_logical_date = use_task_logical_date + if use_task_execution_day: + self.use_task_logical_date = use_task_execution_day + warnings.warn( + "Parameter ``use_task_execution_day`` is deprecated. Use ``use_task_logical_date``.", + DeprecationWarning, + stacklevel=2, + ) self._week_day_num = WeekDay.validate_week_day(week_day) def poke(self, context: Context): @@ -82,7 +90,7 @@ def poke(self, context: Context): self.week_day, WeekDay(timezone.utcnow().isoweekday()).name, ) - if self.use_task_execution_day: + if self.use_task_logical_date: return context['logical_date'].isoweekday() in self._week_day_num else: return timezone.utcnow().isoweekday() in self._week_day_num diff --git a/airflow/utils/airflow_flask_app.py b/airflow/utils/airflow_flask_app.py new file mode 100644 index 0000000000000..a14ff99398d21 --- /dev/null +++ b/airflow/utils/airflow_flask_app.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, List, cast + +from flask import Flask + +from airflow.models.dagbag import DagBag +from airflow.www.extensions.init_appbuilder import AirflowAppBuilder + + +class AirflowApp(Flask): + """Airflow Flask Application""" + + appbuilder: AirflowAppBuilder + dag_bag: DagBag + api_auth: List[Any] + + +def get_airflow_app() -> AirflowApp: + from flask import current_app + + return cast(AirflowApp, current_app) diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 46b1968970112..7bdd33fb936be 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -93,6 +93,7 @@ "2.3.0": "b1b348e02d07", "2.3.1": "1de7bc13c950", "2.3.2": "3c94c427fdf6", + "2.3.3": "f5fcbda3e651", } @@ -798,6 +799,21 @@ def log_template_exists(): filename = conf.get("logging", "log_filename_template") elasticsearch_id = conf.get("elasticsearch", "log_id_template") + # First check if we have an empty table. If so, and the default values exist, + # we will seed the table with the values from pre 2.3.0, so old logs will + # still be retrievable. + if not session.query(LogTemplate.id).first(): + is_default_log_id = elasticsearch_id == conf.airflow_defaults.get("elasticsearch", "log_id_template") + is_default_filename = filename == conf.airflow_defaults.get("logging", "log_filename_template") + if is_default_log_id and is_default_filename: + session.add( + LogTemplate( + filename="{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log", + elasticsearch_id="{dag_id}-{task_id}-{execution_date}-{try_number}", + ) + ) + session.flush() + # Before checking if the _current_ value exists, we need to check if the old config value we upgraded in # place exists! pre_upgrade_filename = conf.upgraded_values.get(("logging", "log_filename_template"), filename) @@ -861,7 +877,6 @@ def reflect_tables(tables: List[Union[Base, str]], session): This function gets the current state of each table in the set of models provided and returns a SqlAlchemy metadata object containing them. """ - import sqlalchemy.schema metadata = sqlalchemy.schema.MetaData(session.bind) @@ -1173,7 +1188,6 @@ def _move_duplicate_data_to_new_table( building the DELETE FROM join condition. :param target_table_name: name of the table in which to park the duplicate rows """ - bind = session.get_bind() dialect_name = bind.dialect.name query = ( diff --git a/airflow/utils/json.py b/airflow/utils/json.py index 9fc649598201d..99ab607c03200 100644 --- a/airflow/utils/json.py +++ b/airflow/utils/json.py @@ -16,11 +16,14 @@ # specific language governing permissions and limitations # under the License. +import logging from datetime import date, datetime from decimal import Decimal from flask.json import JSONEncoder +from airflow.utils.timezone import convert_to_utc, is_naive + try: import numpy as np except ImportError: @@ -33,6 +36,8 @@ # Dates and JSON encoding/decoding +log = logging.getLogger(__name__) + class AirflowJsonEncoder(JSONEncoder): """Custom Airflow json encoder implementation.""" @@ -45,7 +50,9 @@ def __init__(self, *args, **kwargs): def _default(obj): """Convert dates and numpy objects in a json serializable format.""" if isinstance(obj, datetime): - return obj.strftime('%Y-%m-%dT%H:%M:%SZ') + if is_naive(obj): + obj = convert_to_utc(obj) + return obj.isoformat() elif isinstance(obj, date): return obj.strftime('%Y-%m-%d') elif isinstance(obj, Decimal): @@ -81,6 +88,21 @@ def _default(obj): elif k8s is not None and isinstance(obj, (k8s.V1Pod, k8s.V1ResourceRequirements)): from airflow.kubernetes.pod_generator import PodGenerator - return PodGenerator.serialize_pod(obj) + def safe_get_name(pod): + """ + We're running this in an except block, so we don't want it to + fail under any circumstances, e.g. by accessing an attribute that isn't there + """ + try: + return pod.metadata.name + except Exception: + return None + + try: + return PodGenerator.serialize_pod(obj) + except Exception: + log.warning("JSON encoding failed for pod %s", safe_get_name(obj)) + log.debug("traceback for pod JSON encode error", exc_info=True) + return {} raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable") diff --git a/airflow/utils/jwt_signer.py b/airflow/utils/jwt_signer.py new file mode 100644 index 0000000000000..e767997ebeb78 --- /dev/null +++ b/airflow/utils/jwt_signer.py @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from datetime import datetime, timedelta +from typing import Any, Dict + +import jwt + + +class JWTSigner: + """ + Signs and verifies JWT Token. Used to authorise and verify requests. + + :param secret_key: key used to sign the request + :param expiration_time_in_seconds: time after which the token becomes invalid (in seconds) + :param audience: audience that the request is expected to have + :param leeway_in_seconds: leeway that allows for a small clock skew between the two parties + :param algorithm: algorithm used for signing + """ + + def __init__( + self, + secret_key: str, + expiration_time_in_seconds: int, + audience: str, + leeway_in_seconds: int = 5, + algorithm: str = "HS512", + ): + self._secret_key = secret_key + self._expiration_time_in_seconds = expiration_time_in_seconds + self._audience = audience + self._leeway_in_seconds = leeway_in_seconds + self._algorithm = algorithm + + def generate_signed_token(self, extra_payload: Dict[str, Any]) -> str: + """ + Generate JWT with extra payload added. + :param extra_payload: extra payload that is added to the signed token + :return: signed token + """ + jwt_dict = { + "aud": self._audience, + "iat": datetime.utcnow(), + "nbf": datetime.utcnow(), + "exp": datetime.utcnow() + timedelta(seconds=self._expiration_time_in_seconds), + } + jwt_dict.update(extra_payload) + token = jwt.encode( + jwt_dict, + self._secret_key, + algorithm=self._algorithm, + ) + return token + + def verify_token(self, token: str) -> Dict[str, Any]: + payload = jwt.decode( + token, + self._secret_key, + leeway=timedelta(seconds=self._leeway_in_seconds), + algorithms=[self._algorithm], + options={ + "verify_signature": True, + "require": ["exp", "iat", "nbf"], + }, + audience=self._audience, + ) + return payload diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py index e0561991b255f..2c53529a72dc0 100644 --- a/airflow/utils/log/file_task_handler.py +++ b/airflow/utils/log/file_task_handler.py @@ -18,16 +18,17 @@ """File logging handler for tasks.""" import logging import os +import warnings from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Optional, Tuple -from itsdangerous import TimedJSONWebSignatureSerializer - from airflow.configuration import AirflowConfigException, conf from airflow.utils.context import Context from airflow.utils.helpers import parse_template_string, render_template_to_string +from airflow.utils.jwt_signer import JWTSigner from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler +from airflow.utils.session import create_session if TYPE_CHECKING: from airflow.models import TaskInstance @@ -44,11 +45,15 @@ class FileTaskHandler(logging.Handler): :param filename_template: template filename string """ - def __init__(self, base_log_folder: str, filename_template: str): + def __init__(self, base_log_folder: str, filename_template: Optional[str] = None): super().__init__() self.handler: Optional[logging.FileHandler] = None self.local_base = base_log_folder - self.filename_template, self.filename_jinja_template = parse_template_string(filename_template) + if filename_template is not None: + warnings.warn( + "Passing filename_template to FileTaskHandler is deprecated and has no effect", + DeprecationWarning, + ) def set_context(self, ti: "TaskInstance"): """ @@ -75,15 +80,19 @@ def close(self): self.handler.close() def _render_filename(self, ti: "TaskInstance", try_number: int) -> str: - if self.filename_jinja_template: + with create_session() as session: + dag_run = ti.get_dagrun(session=session) + template = dag_run.get_log_template(session=session).filename + str_tpl, jinja_tpl = parse_template_string(template) + + if jinja_tpl: if hasattr(ti, "task"): context = ti.get_template_context() else: - context = Context(ti=ti, ts=ti.get_dagrun().logical_date.isoformat()) + context = Context(ti=ti, ts=dag_run.logical_date.isoformat()) context["try_number"] = try_number - return render_template_to_string(self.filename_jinja_template, context) - elif self.filename_template: - dag_run = ti.get_dagrun() + return render_template_to_string(jinja_tpl, context) + elif str_tpl: dag = ti.task.dag assert dag is not None # For Mypy. try: @@ -98,7 +107,7 @@ def _render_filename(self, ti: "TaskInstance", try_number: int) -> str: data_interval_end = data_interval[1].isoformat() else: data_interval_end = "" - return self.filename_template.format( + return str_tpl.format( dag_id=ti.dag_id, task_id=ti.task_id, run_id=ti.run_id, @@ -191,16 +200,17 @@ def _read(self, ti, try_number, metadata=None): except (AirflowConfigException, ValueError): pass - signer = TimedJSONWebSignatureSerializer( + signer = JWTSigner( secret_key=conf.get('webserver', 'secret_key'), - algorithm_name='HS512', - expires_in=conf.getint('webserver', 'log_request_clock_grace', fallback=30), - # This isn't really a "salt", more of a signing context - salt='task-instance-logs', + expiration_time_in_seconds=conf.getint( + 'webserver', 'log_request_clock_grace', fallback=30 + ), + audience="task-instance-logs", ) - response = httpx.get( - url, timeout=timeout, headers={'Authorization': signer.dumps(log_relative_path)} + url, + timeout=timeout, + headers={b'Authorization': signer.generate_signed_token({"filename": log_relative_path})}, ) response.encoding = "utf-8" diff --git a/airflow/utils/log/log_reader.py b/airflow/utils/log/log_reader.py index 396ab90a324f5..f241c22df188d 100644 --- a/airflow/utils/log/log_reader.py +++ b/airflow/utils/log/log_reader.py @@ -121,6 +121,6 @@ def render_log_filename( attachment_filename = render_log_filename( ti=ti, try_number="all" if try_number is None else try_number, - filename_template=dagrun.get_log_filename_template(session=session), + filename_template=dagrun.get_log_template(session=session).filename, ) return attachment_filename diff --git a/airflow/utils/serve_logs.py b/airflow/utils/serve_logs.py index 50fdb47a024a6..e14162178b182 100644 --- a/airflow/utils/serve_logs.py +++ b/airflow/utils/serve_logs.py @@ -16,55 +16,89 @@ # under the License. """Serve logs process""" +import logging import os -import time import gunicorn.app.base from flask import Flask, abort, request, send_from_directory -from itsdangerous import TimedJSONWebSignatureSerializer +from jwt.exceptions import ( + ExpiredSignatureError, + ImmatureSignatureError, + InvalidAudienceError, + InvalidIssuedAtError, + InvalidSignatureError, +) from setproctitle import setproctitle from airflow.configuration import conf +from airflow.utils.docs import get_docs_url +from airflow.utils.jwt_signer import JWTSigner + +logger = logging.getLogger(__name__) def create_app(): flask_app = Flask(__name__, static_folder=None) - max_request_age = conf.getint('webserver', 'log_request_clock_grace', fallback=30) + expiration_time_in_seconds = conf.getint('webserver', 'log_request_clock_grace', fallback=30) log_directory = os.path.expanduser(conf.get('logging', 'BASE_LOG_FOLDER')) - signer = TimedJSONWebSignatureSerializer( + signer = JWTSigner( secret_key=conf.get('webserver', 'secret_key'), - algorithm_name='HS512', - expires_in=max_request_age, - # This isn't really a "salt", more of a signing context - salt='task-instance-logs', + expiration_time_in_seconds=expiration_time_in_seconds, + audience="task-instance-logs", ) # Prevent direct access to the logs port @flask_app.before_request def validate_pre_signed_url(): try: - auth = request.headers['Authorization'] - - # We don't actually care about the payload, just that the signature - # was valid and the `exp` claim is correct - filename, headers = signer.loads(auth, return_header=True) - - issued_at = int(headers['iat']) - expires_at = int(headers['exp']) - except Exception: + auth = request.headers.get('Authorization') + if auth is None: + logger.warning("The Authorization header is missing: %s.", request.headers) + abort(403) + payload = signer.verify_token(auth) + token_filename = payload.get("filename") + request_filename = request.view_args['filename'] + if token_filename is None: + logger.warning("The payload does not contain 'filename' key: %s.", payload) + abort(403) + if token_filename != request_filename: + logger.warning( + "The payload log_relative_path key is different than the one in token:" + "Request path: %s. Token path: %s.", + request_filename, + token_filename, + ) + abort(403) + except InvalidAudienceError: + logger.warning("Invalid audience for the request", exc_info=True) abort(403) - - if filename != request.view_args['filename']: + except InvalidSignatureError: + logger.warning("The signature of the request was wrong", exc_info=True) abort(403) - - # Validate the `iat` and `exp` are within `max_request_age` of now. - now = int(time.time()) - if abs(now - issued_at) > max_request_age: + except ImmatureSignatureError: + logger.warning("The signature of the request was sent from the future", exc_info=True) abort(403) - if abs(now - expires_at) > max_request_age: + except ExpiredSignatureError: + logger.warning( + "The signature of the request has expired. Make sure that all components " + "in your system have synchronized clocks. " + "See more at %s", + get_docs_url("configurations-ref.html#secret-key"), + exc_info=True, + ) abort(403) - if issued_at > expires_at or expires_at - issued_at > max_request_age: + except InvalidIssuedAtError: + logger.warning( + "The request was issues in the future. Make sure that all components " + "in your system have synchronized clocks. " + "See more at %s", + get_docs_url("configurations-ref.html#secret-key"), + exc_info=True, + ) + abort(403) + except Exception: + logger.warning("Unknown error", exc_info=True) abort(403) @flask_app.route('/log/') diff --git a/airflow/utils/state.py b/airflow/utils/state.py index 8415dd16667a8..a79169f86169f 100644 --- a/airflow/utils/state.py +++ b/airflow/utils/state.py @@ -154,6 +154,7 @@ def color_fg(cls, state): TaskInstanceState.FAILED, TaskInstanceState.SKIPPED, TaskInstanceState.UPSTREAM_FAILED, + TaskInstanceState.REMOVED, ] ) """ diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index 7b53a521db21b..ed8d380ff0bc8 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -24,7 +24,12 @@ import weakref from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Sequence, Set, Tuple, Union -from airflow.exceptions import AirflowDagCycleException, AirflowException, DuplicateTaskIdFound +from airflow.exceptions import ( + AirflowDagCycleException, + AirflowException, + DuplicateTaskIdFound, + TaskAlreadyInTaskGroup, +) from airflow.models.taskmixin import DAGNode, DependencyMixin from airflow.serialization.enums import DagAttributeTypes from airflow.utils.helpers import validate_group_key @@ -186,7 +191,16 @@ def __iter__(self): yield child def add(self, task: DAGNode) -> None: - """Add a task to this TaskGroup.""" + """Add a task to this TaskGroup. + + :meta private: + """ + from airflow.models.abstractoperator import AbstractOperator + + existing_tg = task.task_group + if isinstance(task, AbstractOperator) and existing_tg is not None and existing_tg != self: + raise TaskAlreadyInTaskGroup(task.node_id, existing_tg.node_id, self.node_id) + # Set the TG first, as setting it might change the return value of node_id! task.task_group = weakref.proxy(self) key = task.node_id diff --git a/airflow/www/.babelrc b/airflow/www/.babelrc index 7316527d05fe1..24da7a59d69b3 100644 --- a/airflow/www/.babelrc +++ b/airflow/www/.babelrc @@ -1,7 +1,7 @@ { "env": { "test": { - "presets": ["@babel/preset-env", "@babel/preset-react"], + "presets": ["@babel/preset-env", "@babel/preset-react", "@babel/preset-typescript"], "plugins": ["@babel/plugin-transform-runtime"] } } diff --git a/airflow/www/.eslintrc b/airflow/www/.eslintrc index 216c91070ca62..b1a446a31b818 100644 --- a/airflow/www/.eslintrc +++ b/airflow/www/.eslintrc @@ -1,10 +1,65 @@ { "extends": ["airbnb", "airbnb/hooks"], - "parser": "babel-eslint", + "parser": "@babel/eslint-parser", + "parserOptions": { + "babelOptions": { + "presets": ["@babel/preset-env", "@babel/preset-react", "@babel/preset-typescript"], + "plugins": ["@babel/plugin-transform-runtime"] + } + }, "plugins": [ "html", "react" ], "rules": { "no-param-reassign": 1, "react/prop-types": 0, - "react/jsx-props-no-spreading": 0 - } + "react/jsx-props-no-spreading": 0, + "import/extensions": [ + "error", + "ignorePackages", + { + "js": "never", + "jsx": "never", + "ts": "never", + "tsx": "never" + } + ], + "import/no-extraneous-dependencies": [ + "error", + { + "devDependencies": true, + "optionalDependencies": false, + "peerDependencies": false + } + ], + "react/function-component-definition": [ + 0, + { + "namedComponents": "function-declaration" + } + ] + }, + "settings": { + "import/resolver": { + "node": { + "extensions": [".js", ".jsx", ".ts", ".tsx"] + } + } + }, + // eslint that apply only to typescript files + "overrides": [ + { + "files": ["*.ts", "*.tsx"], + "extends": [ + "airbnb-typescript" + ], + "parser": "@typescript-eslint/parser", + "plugins": [ "@typescript-eslint" ], + "parserOptions": { + "project": "./tsconfig.json" + }, + "rules": { + "react/require-default-props": 0, + "@typescript-eslint/no-explicit-any": 1 + } + } + ] } diff --git a/airflow/www/api/experimental/endpoints.py b/airflow/www/api/experimental/endpoints.py index 898988db81c50..75256f13736fd 100644 --- a/airflow/www/api/experimental/endpoints.py +++ b/airflow/www/api/experimental/endpoints.py @@ -70,7 +70,8 @@ def add_deprecation_headers(response: Response): return response -api_experimental.after_request(add_deprecation_headers) +# This API is deprecated. We do not care too much about typing here +api_experimental.after_request(add_deprecation_headers) # type: ignore[arg-type] @api_experimental.route('/dags//dag_runs', methods=['POST']) diff --git a/airflow/www/auth.py b/airflow/www/auth.py index 9d40c00a5cf10..17ac9c2ce44ae 100644 --- a/airflow/www/auth.py +++ b/airflow/www/auth.py @@ -15,13 +15,13 @@ # specific language governing permissions and limitations # under the License. -import socket from functools import wraps from typing import Callable, Optional, Sequence, Tuple, TypeVar, cast from flask import current_app, flash, g, redirect, render_template, request, url_for from airflow.configuration import conf +from airflow.utils.net import get_hostname T = TypeVar("T", bound=Callable) @@ -37,7 +37,10 @@ def decorated(*args, **kwargs): appbuilder = current_app.appbuilder dag_id = ( - request.args.get("dag_id") or request.form.get("dag_id") or (request.json or {}).get("dag_id") + request.args.get("dag_id") + or request.form.get("dag_id") + or (request.is_json and request.json.get("dag_id")) + or None ) if appbuilder.sm.check_authorization(permissions, dag_id): return func(*args, **kwargs) @@ -45,7 +48,7 @@ def decorated(*args, **kwargs): return ( render_template( 'airflow/no_roles_permissions.html', - hostname=socket.getfqdn() + hostname=get_hostname() if conf.getboolean('webserver', 'EXPOSE_HOSTNAME', fallback=True) else 'redact', logout_url=appbuilder.get_url_for_logout, diff --git a/airflow/www/extensions/init_jinja_globals.py b/airflow/www/extensions/init_jinja_globals.py index 5b6a5a488ece8..ca78ab4047a25 100644 --- a/airflow/www/extensions/init_jinja_globals.py +++ b/airflow/www/extensions/init_jinja_globals.py @@ -16,13 +16,13 @@ # under the License. import logging -import socket import pendulum import airflow from airflow.configuration import conf from airflow.settings import IS_K8S_OR_K8SCELERY_EXECUTOR, STATE_COLORS +from airflow.utils.net import get_hostname from airflow.utils.platform import get_airflow_git_version @@ -43,7 +43,7 @@ def init_jinja_globals(app): default_ui_timezone = server_timezone expose_hostname = conf.getboolean('webserver', 'EXPOSE_HOSTNAME', fallback=True) - hostname = socket.getfqdn() if expose_hostname else 'redact' + hostname = get_hostname() if expose_hostname else 'redact' try: airflow_version = airflow.__version__ diff --git a/airflow/www/extensions/init_wsgi_middlewares.py b/airflow/www/extensions/init_wsgi_middlewares.py index 0ed78073e92f5..00c04006ff68e 100644 --- a/airflow/www/extensions/init_wsgi_middlewares.py +++ b/airflow/www/extensions/init_wsgi_middlewares.py @@ -37,7 +37,7 @@ def init_wsgi_middleware(flask_app: Flask): base_url = "" if base_url: flask_app.wsgi_app = DispatcherMiddleware( # type: ignore - _root_app, mounts={base_url: flask_app.wsgi_app} + _root_app, mounts={base_url: flask_app.wsgi_app} # type: ignore ) # Apply ProxyFix middleware diff --git a/airflow/www/fab_security/manager.py b/airflow/www/fab_security/manager.py index 8381f7b08cdc7..5cdbe60d39613 100644 --- a/airflow/www/fab_security/manager.py +++ b/airflow/www/fab_security/manager.py @@ -291,7 +291,7 @@ def create_jwt_manager(self, app) -> JWTManager: """ jwt_manager = JWTManager() jwt_manager.init_app(app) - jwt_manager.user_loader_callback_loader(self.load_user_jwt) + jwt_manager.user_lookup_loader(self.load_user_jwt) return jwt_manager def create_builtin_roles(self): @@ -654,6 +654,18 @@ def get_oauth_user_info(self, provider, resp): "email": data.get("email", ""), "role_keys": data.get("groups", []), } + # for Keycloak + if provider in ["keycloak", "keycloak_before_17"]: + me = self.appbuilder.sm.oauth_remotes[provider].get("openid-connect/userinfo") + me.raise_for_status() + data = me.json() + log.debug("User info from Keycloak: %s", data) + return { + "username": data.get("preferred_username", ""), + "first_name": data.get("given_name", ""), + "last_name": data.get("family_name", ""), + "email": data.get("email", ""), + } else: return {} @@ -785,7 +797,7 @@ def register_views(self): if self.appbuilder.app.config.get("FAB_ADD_SECURITY_PERMISSION_VIEWS_VIEW", True): self.appbuilder.add_view( self.permissionmodelview, - "Permissions", + "Permission Pairs", icon="fa-link", label=_("Permissions"), category="Security", @@ -1027,12 +1039,6 @@ def auth_user_ldap(self, username, password): try: # LDAP certificate settings - if self.auth_ldap_allow_self_signed: - ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) - ldap.set_option(ldap.OPT_X_TLS_NEWCTX, 0) - elif self.auth_ldap_tls_demand: - ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_DEMAND) - ldap.set_option(ldap.OPT_X_TLS_NEWCTX, 0) if self.auth_ldap_tls_cacertdir: ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, self.auth_ldap_tls_cacertdir) if self.auth_ldap_tls_cacertfile: @@ -1041,6 +1047,12 @@ def auth_user_ldap(self, username, password): ldap.set_option(ldap.OPT_X_TLS_CERTFILE, self.auth_ldap_tls_certfile) if self.auth_ldap_tls_keyfile: ldap.set_option(ldap.OPT_X_TLS_KEYFILE, self.auth_ldap_tls_keyfile) + if self.auth_ldap_allow_self_signed: + ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) + ldap.set_option(ldap.OPT_X_TLS_NEWCTX, 0) + elif self.auth_ldap_tls_demand: + ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_DEMAND) + ldap.set_option(ldap.OPT_X_TLS_NEWCTX, 0) # Initialise LDAP connection con = ldap.initialize(self.auth_ldap_server) @@ -1354,7 +1366,10 @@ def get_user_menu_access(self, menu_names: Optional[List[str]] = None) -> Set[st return self._get_user_permission_resources(g.user, "menu_access", resource_names=menu_names) elif current_user_jwt: return self._get_user_permission_resources( - current_user_jwt, "menu_access", resource_names=menu_names + # the current_user_jwt is a lazy proxy, so we need to ignore type checking + current_user_jwt, # type: ignore[arg-type] + "menu_access", + resource_names=menu_names, ) else: return self._get_user_permission_resources(None, "menu_access", resource_names=menu_names) @@ -1660,9 +1675,9 @@ def load_user(self, user_id): """Load user by ID""" return self.get_user_by_id(int(user_id)) - def load_user_jwt(self, user_id): - """Load user JWT""" - user = self.load_user(user_id) + def load_user_jwt(self, _jwt_header, jwt_data): + identity = jwt_data["sub"] + user = self.load_user(identity) # Set flask g.user to JWT user, we can't do it on before request g.user = user return user diff --git a/airflow/www/jest-setup.js b/airflow/www/jest-setup.js index 5ffa8890d55d3..a4679deed25ea 100644 --- a/airflow/www/jest-setup.js +++ b/airflow/www/jest-setup.js @@ -20,6 +20,21 @@ */ import '@testing-library/jest-dom'; +import axios from 'axios'; +import { setLogger } from 'react-query'; + +axios.defaults.adapter = require('axios/lib/adapters/http'); + +axios.interceptors.response.use( + (res) => res.data || res, +); + +setLogger({ + log: console.log, + warn: console.warn, + // ✅ no more errors on the console + error: () => {}, +}); // Mock global objects we use across the app global.stateColors = { diff --git a/airflow/www/jest.config.js b/airflow/www/jest.config.js index 43501e1c697fc..677bdcc112272 100644 --- a/airflow/www/jest.config.js +++ b/airflow/www/jest.config.js @@ -20,7 +20,7 @@ const config = { verbose: true, transform: { - '^.+\\.jsx?$': 'babel-jest', + '^.+\\.[jt]sx?$': 'babel-jest', }, testEnvironment: 'jsdom', setupFilesAfterEnv: ['./jest-setup.js'], diff --git a/airflow/www/package.json b/airflow/www/package.json index bf9cc10dbe75e..15d237d1cd7a8 100644 --- a/airflow/www/package.json +++ b/airflow/www/package.json @@ -2,11 +2,11 @@ "description": "Apache Airflow is a platform to programmatically author, schedule and monitor workflows.", "scripts": { "test": "jest", - "dev": "NODE_ENV=dev webpack --watch --colors --progress --debug --output-pathinfo --devtool eval-cheap-source-map --mode development", - "prod": "NODE_ENV=production node --max_old_space_size=4096 ./node_modules/webpack/bin/webpack.js -p --colors --progress", - "build": "NODE_ENV=production webpack --colors --progress", - "lint": "eslint --ignore-path=.eslintignore --ext .js,.jsx .", - "lint:fix": "eslint --fix --ignore-path=.eslintignore --ext .js,.jsx ." + "dev": "NODE_ENV=development webpack --watch --progress --devtool eval-cheap-source-map --mode development", + "prod": "NODE_ENV=production node --max_old_space_size=4096 ./node_modules/webpack/bin/webpack.js --mode production --progress", + "build": "NODE_ENV=production webpack --progress --mode production", + "lint": "eslint --ignore-path=.eslintignore --ext .js,.jsx,.ts,.tsx . && tsc --noEmit", + "lint:fix": "eslint --fix --ignore-path=.eslintignore --ext .js,.jsx,.ts,.tsx . && tsc --noEmit" }, "author": "Apache", "license": "Apache-2.0", @@ -26,53 +26,57 @@ "flask" ], "devDependencies": { + "@babel/core": "^7.18.5", + "@babel/eslint-parser": "^7.18.2", "@babel/plugin-transform-runtime": "^7.16.0", "@babel/preset-env": "^7.16.0", "@babel/preset-react": "^7.16.0", - "@testing-library/jest-dom": "^5.15.0", - "@testing-library/react": "^12.1.2", - "@testing-library/react-hooks": "^7.0.2", - "babel": "^6.23.0", - "babel-core": "^6.26.3", - "babel-eslint": "^10.1.0", + "@babel/preset-typescript": "^7.17.12", + "@testing-library/jest-dom": "^5.16.0", + "@testing-library/react": "^13.0.0", + "@types/react": "^18.0.12", + "@types/react-dom": "^18.0.5", + "@typescript-eslint/eslint-plugin": "^5.13.0", + "@typescript-eslint/parser": "^5.0.0", "babel-jest": "^27.3.1", "babel-loader": "^8.1.0", - "babel-plugin-css-modules-transform": "^1.6.1", - "babel-polyfill": "^6.26.0", "clean-webpack-plugin": "^3.0.0", "copy-webpack-plugin": "^6.0.3", - "css-loader": "^3.4.2", - "eslint": "^7.2.0", - "eslint-config-airbnb": "18.2.1", + "css-loader": "5.2.7", + "css-minimizer-webpack-plugin": "^4.0.0", + "eslint": "^8.6.0", + "eslint-config-airbnb": "^19.0.4", + "eslint-config-airbnb-typescript": "^17.0.0", "eslint-plugin-html": "^6.0.2", - "eslint-plugin-import": "^2.22.1", - "eslint-plugin-jsx-a11y": "^6.4.1", + "eslint-plugin-import": "^2.25.3", + "eslint-plugin-jsx-a11y": "^6.5.0", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.2.1", - "eslint-plugin-react": "^7.21.5", - "eslint-plugin-react-hooks": "^1.7.0", + "eslint-plugin-react": "^7.30.0", + "eslint-plugin-react-hooks": "^4.5.0", "eslint-plugin-standard": "^4.0.1", "file-loader": "^6.0.0", "imports-loader": "^1.1.0", "jest": "^27.3.1", - "mini-css-extract-plugin": "1.6.0", - "moment": "^2.29.2", + "mini-css-extract-plugin": "^1.6.2", + "moment": "^2.29.3", "moment-locales-webpack-plugin": "^1.2.0", - "optimize-css-assets-webpack-plugin": "6.0.0", + "nock": "^13.2.4", "style-loader": "^1.2.1", "stylelint": "^13.6.1", "stylelint-config-standard": "^20.0.0", "terser-webpack-plugin": "<5.0.0", + "typescript": "^4.6.3", "url-loader": "4.1.0", - "webpack": "4.44.2", - "webpack-cli": "^3.3.12", + "webpack": "^5.73.0", + "webpack-cli": "^4.0.0", "webpack-license-plugin": "^4.2.1", - "webpack-manifest-plugin": "^2.2.0" + "webpack-manifest-plugin": "^4.0.0" }, "dependencies": { - "@chakra-ui/react": "^1.8.3", - "@emotion/cache": "^11.4.0", - "@emotion/react": "^11.4.1", + "@chakra-ui/react": "^2.2.0", + "@emotion/cache": "^11.9.3", + "@emotion/react": "^11.9.3", "@emotion/styled": "^11", "axios": "^0.26.0", "bootstrap-3-typeahead": "^4.0.2", @@ -85,32 +89,19 @@ "datatables.net": "^1.11.4", "datatables.net-bs": "^1.11.4", "eonasdan-bootstrap-datetimepicker": "^4.17.47", - "framer-motion": "^4", + "framer-motion": "^6.0.0", "jquery": ">=3.5.0", "jshint": "^2.13.4", "lodash": "^4.17.21", - "moment-timezone": "^0.5.28", + "moment-timezone": "^0.5.34", "nvd3": "^1.8.6", - "react": "^17.0.2", - "react-dom": "^17.0.2", + "react": "^18.0.0", + "react-dom": "^18.0.0", "react-icons": "^4.3.1", - "react-query": "^3.34.16", + "react-query": "^3.39.1", "react-router-dom": "^6.3.0", - "react-table": "^7.7.0", - "redoc": "^2.0.0-rc.63", + "react-table": "^7.8.0", + "redoc": "^2.0.0-rc.72", "url-search-params-polyfill": "^8.1.0" - }, - "resolutions": { - "lodash": "^4.17.21", - "css-what": ">=5.0.1", - "trim-newlines": ">=3.0.1", - "y18n": ">=5.0.5", - "postcss": ">=8.2.10", - "glob-parent": ">=5.1.2", - "ssri": ">= 8.0.1", - "jquery": ">=3.5.0", - "browserslist": ">=4.16.5", - "tar": ">=6.1.9", - "node-fetch": ">=2.6.7" } } diff --git a/airflow/www/security.py b/airflow/www/security.py index 42188f06184b4..de6b0d646e8c8 100644 --- a/airflow/www/security.py +++ b/airflow/www/security.py @@ -200,6 +200,16 @@ def __init__(self, appbuilder): view.datamodel = CustomSQLAInterface(view.datamodel.obj) self.perms = None + def _get_root_dag_id(self, dag_id): + if '.' in dag_id: + dm = ( + self.get_session.query(DagModel.dag_id, DagModel.root_dag_id) + .filter(DagModel.dag_id == dag_id) + .first() + ) + return dm.root_dag_id or dm.dag_id + return dag_id + def init_role(self, role_name, perms): """ Initialize the role with actions and related resources. @@ -340,7 +350,8 @@ def get_accessible_dag_ids(self, user, user_actions=None, session=None) -> Set[s def can_access_some_dags(self, action: str, dag_id: Optional[str] = None) -> bool: """Checks if user has read or write access to some dags.""" if dag_id and dag_id != '~': - return self.has_access(action, permissions.resource_name_for_dag(dag_id)) + root_dag_id = self._get_root_dag_id(dag_id) + return self.has_access(action, permissions.resource_name_for_dag(root_dag_id)) user = g.user if action == permissions.ACTION_CAN_READ: @@ -349,17 +360,20 @@ def can_access_some_dags(self, action: str, dag_id: Optional[str] = None) -> boo def can_read_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG read access.""" - dag_resource_name = permissions.resource_name_for_dag(dag_id) + root_dag_id = self._get_root_dag_id(dag_id) + dag_resource_name = permissions.resource_name_for_dag(root_dag_id) return self.has_access(permissions.ACTION_CAN_READ, dag_resource_name, user=user) def can_edit_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG edit access.""" - dag_resource_name = permissions.resource_name_for_dag(dag_id) + root_dag_id = self._get_root_dag_id(dag_id) + dag_resource_name = permissions.resource_name_for_dag(root_dag_id) return self.has_access(permissions.ACTION_CAN_EDIT, dag_resource_name, user=user) def can_delete_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG delete access.""" - dag_resource_name = permissions.resource_name_for_dag(dag_id) + root_dag_id = self._get_root_dag_id(dag_id) + dag_resource_name = permissions.resource_name_for_dag(root_dag_id) return self.has_access(permissions.ACTION_CAN_DELETE, dag_resource_name, user=user) def prefixed_dag_id(self, dag_id): @@ -370,7 +384,8 @@ def prefixed_dag_id(self, dag_id): DeprecationWarning, stacklevel=2, ) - return permissions.resource_name_for_dag(dag_id) + root_dag_id = self._get_root_dag_id(dag_id) + return permissions.resource_name_for_dag(root_dag_id) def is_dag_resource(self, resource_name): """Determines if a resource belongs to a DAG or all DAGs.""" @@ -530,7 +545,8 @@ def create_dag_specific_permissions(self) -> None: dags = dagbag.dags.values() for dag in dags: - dag_resource_name = permissions.resource_name_for_dag(dag.dag_id) + root_dag_id = dag.parent_dag.dag_id if dag.parent_dag else dag.dag_id + dag_resource_name = permissions.resource_name_for_dag(root_dag_id) for action_name in self.DAG_ACTIONS: if (action_name, dag_resource_name) not in perms: self._merge_perm(action_name, dag_resource_name) @@ -615,6 +631,7 @@ def _sync_dag_view_permissions(self, dag_id, access_control): :param access_control: a dict where each key is a rolename and each value is a set() of action names (e.g. {'can_read'}) """ + dag_resource_name = permissions.resource_name_for_dag(dag_id) def _get_or_create_dag_permission(action_name: str) -> Optional[Permission]: diff --git a/airflow/www/static/css/main.css b/airflow/www/static/css/main.css index 37bee892bd132..8a7a7eeec77d9 100644 --- a/airflow/www/static/css/main.css +++ b/airflow/www/static/css/main.css @@ -465,6 +465,10 @@ label[for="timezone-other"], z-index: 1070; } +details summary { + display: list-item; +} + .menu-scroll { max-height: 300px; overflow-y: auto; diff --git a/airflow/www/static/js/dag.js b/airflow/www/static/js/dag.js index 0445e0686cb56..2a99a0af1658e 100644 --- a/airflow/www/static/js/dag.js +++ b/airflow/www/static/js/dag.js @@ -17,7 +17,7 @@ * under the License. */ -/* global document, window, Event, $ */ +/* global document, window, CustomEvent, $ */ import { getMetaValue } from './utils'; import { approxTimeFromNow, formatDateTime } from './datetime_utils'; @@ -367,9 +367,7 @@ $('#pause_resume').on('change', function onChange() { $input.removeClass('switch-input--error'); // dispatch an event that React can listen for - const event = new Event('paused'); - event.value = isPaused; - event.key = 'isPaused'; + const event = new CustomEvent('paused', { detail: isPaused }); document.dispatchEvent(event); $.post(url).fail(() => { diff --git a/airflow/www/static/js/dag_dependencies.js b/airflow/www/static/js/dag_dependencies.js index 4e342288efcd9..81ba457522b54 100644 --- a/airflow/www/static/js/dag_dependencies.js +++ b/airflow/www/static/js/dag_dependencies.js @@ -198,8 +198,14 @@ const renderGraph = () => { g.setNode(node.id, node.value); }); + // filter out edges that point to non-existent nodes + const realEdges = edges.filter((e) => { + const edgeNodes = nodes.filter((n) => n.id === e.u || n.id === e.v); + return edgeNodes.length === 2; + }); + // Set edges - edges.forEach((edge) => { + realEdges.forEach((edge) => { g.setEdge(edge.u, edge.v, { curve: d3.curveBasis, arrowheadClass: 'arrowhead', diff --git a/airflow/www/static/js/dags.js b/airflow/www/static/js/dags.js index 56afe8b48ee5f..cdf6557cc96be 100644 --- a/airflow/www/static/js/dags.js +++ b/airflow/www/static/js/dags.js @@ -108,14 +108,16 @@ $.each($('[id^=toggle]'), function toggleId() { $('.typeahead').typeahead({ source(query, callback) { - return $.ajax(autocompleteUrl, + return $.ajax( + autocompleteUrl, { data: { query: encodeURIComponent(query), status: statusFilter, }, success: callback, - }); + }, + ); }, autoSelect: false, afterSelect(value) { diff --git a/airflow/www/static/js/grid/AutoRefresh.jsx b/airflow/www/static/js/grid/AutoRefresh.jsx index d92cf07838045..b7c1c29206095 100644 --- a/airflow/www/static/js/grid/AutoRefresh.jsx +++ b/airflow/www/static/js/grid/AutoRefresh.jsx @@ -33,7 +33,13 @@ const AutoRefresh = () => { return ( - + Auto-refresh { - const scrollRef = useRef(); - const tableRef = useRef(); +interface Props { + isPanelOpen?: boolean; + onPanelToggle: () => void; + hoveredTaskState?: string | null; +} + +const Grid = ({ isPanelOpen = false, onPanelToggle, hoveredTaskState }: Props) => { + const scrollRef = useRef(null); + const tableRef = useRef(null); const { data: { groups, dagRuns } } = useGridData(); const dagRunIds = dagRuns.map((dr) => dr.runId); const openGroupsKey = `${dagId}/open-groups`; - const storedGroups = JSON.parse(localStorage.getItem(openGroupsKey)) || []; + const storedGroups = JSON.parse(localStorage.getItem(openGroupsKey) || '[]'); const [openGroupIds, setOpenGroupIds] = useState(storedGroups); - const onToggleGroups = (groupIds) => { + const onToggleGroups = (groupIds: string[]) => { localStorage.setItem(openGroupsKey, JSON.stringify(groupIds)); setOpenGroupIds(groupIds); }; - const scrollOnResize = new ResizeObserver(() => { - const runsContainer = scrollRef.current; - // Set scroll to top right if it is scrollable - if (runsContainer && runsContainer.scrollWidth > runsContainer.clientWidth) { - runsContainer.scrollBy(tableRef.current.offsetWidth, 0); - } - }); - useEffect(() => { + const scrollOnResize = new ResizeObserver(() => { + const runsContainer = scrollRef.current; + // Set scroll to top right if it is scrollable + if ( + tableRef?.current + && runsContainer + && runsContainer.scrollWidth > runsContainer.clientWidth + ) { + runsContainer.scrollBy(tableRef.current.offsetWidth, 0); + } + }); + if (tableRef && tableRef.current) { const table = tableRef.current; @@ -72,45 +84,58 @@ const Grid = ({ isPanelOpen = false, hoveredTaskState }) => { }; } return () => {}; - }, [tableRef, scrollOnResize]); + }, [tableRef, isPanelOpen]); return ( - - - + + + + + + } + transform={!isPanelOpen ? 'rotateZ(180deg)' : undefined} + transitionProperty="none" /> - - - - - - {/* TODO: remove hardcoded values. 665px is roughly the total heade+footer height */} - - {renderTaskRows({ - task: groups, dagRunIds, openGroupIds, onToggleGroups, hoveredTaskState, - })} - -
+ + + + + + + {renderTaskRows({ + task: groups, dagRunIds, openGroupIds, onToggleGroups, hoveredTaskState, + })} + +
+
); }; diff --git a/airflow/www/static/js/grid/LegendRow.test.jsx b/airflow/www/static/js/grid/LegendRow.test.tsx similarity index 71% rename from airflow/www/static/js/grid/LegendRow.test.jsx rename to airflow/www/static/js/grid/LegendRow.test.tsx index 63ea2dfcb4585..7a3ba185932fc 100644 --- a/airflow/www/static/js/grid/LegendRow.test.jsx +++ b/airflow/www/static/js/grid/LegendRow.test.tsx @@ -20,14 +20,16 @@ /* global describe, test, expect, stateColors, jest */ import React from 'react'; -import { render, fireEvent } from '@testing-library/react'; +import { render, fireEvent, waitFor } from '@testing-library/react'; import LegendRow from './LegendRow'; describe('Test LegendRow', () => { test('Render displays correctly the different task states', () => { + const onStatusHover = jest.fn(); + const onStatusLeave = jest.fn(); const { getByText } = render( - , + , ); Object.keys(stateColors).forEach((taskState) => { @@ -41,16 +43,19 @@ describe('Test LegendRow', () => { { state: 'success', expectedSetValue: 'success' }, { state: 'failed', expectedSetValue: 'failed' }, { state: 'no_status', expectedSetValue: null }, - ])('Hovering $state badge should trigger setHoverdTaskState function with $expectedSetValue', + ])( + 'Hovering $state badge should trigger setHoverdTaskState function with $expectedSetValue', async ({ state, expectedSetValue }) => { - const setHoveredTaskState = jest.fn(); + const onStatusHover = jest.fn(); + const onStatusLeave = jest.fn(); const { getByText } = render( - , + , ); const successElement = getByText(state); fireEvent.mouseEnter(successElement); - expect(setHoveredTaskState).toHaveBeenCalledWith(expectedSetValue); + await waitFor(() => expect(onStatusHover).toHaveBeenCalledWith(expectedSetValue)); fireEvent.mouseLeave(successElement); - expect(setHoveredTaskState).toHaveBeenLastCalledWith(); - }); + await waitFor(() => expect(onStatusLeave).toHaveBeenLastCalledWith()); + }, + ); }); diff --git a/airflow/www/static/js/grid/LegendRow.jsx b/airflow/www/static/js/grid/LegendRow.tsx similarity index 70% rename from airflow/www/static/js/grid/LegendRow.jsx rename to airflow/www/static/js/grid/LegendRow.tsx index 3193435701a96..ade94b059250f 100644 --- a/airflow/www/static/js/grid/LegendRow.jsx +++ b/airflow/www/static/js/grid/LegendRow.tsx @@ -26,32 +26,44 @@ import { } from '@chakra-ui/react'; import React from 'react'; +interface LegendProps { + onStatusHover: (status: string | null) => void; + onStatusLeave: () => void; +} + +interface BadgeProps extends LegendProps { + state: string | null; + stateColor: string; + displayValue?: string; +} + const StatusBadge = ({ - state, stateColor, setHoveredTaskState, displayValue, -}) => ( + state, stateColor, onStatusHover, onStatusLeave, displayValue, +}: BadgeProps) => ( setHoveredTaskState(state)} - onMouseLeave={() => setHoveredTaskState()} + onMouseEnter={() => onStatusHover(state)} + onMouseLeave={() => onStatusLeave()} > {displayValue || state } ); -const LegendRow = ({ setHoveredTaskState }) => ( +const LegendRow = ({ onStatusHover, onStatusLeave }: LegendProps) => ( - + { Object.entries(stateColors).map(([state, stateColor]) => ( )) } @@ -60,7 +72,8 @@ const LegendRow = ({ setHoveredTaskState }) => ( displayValue="no_status" state={null} stateColor="white" - setHoveredTaskState={setHoveredTaskState} + onStatusHover={onStatusHover} + onStatusLeave={onStatusLeave} /> diff --git a/airflow/www/static/js/grid/Main.jsx b/airflow/www/static/js/grid/Main.tsx similarity index 56% rename from airflow/www/static/js/grid/Main.jsx rename to airflow/www/static/js/grid/Main.tsx index 8668c1379d1f8..bde36c2984fcf 100644 --- a/airflow/www/static/js/grid/Main.jsx +++ b/airflow/www/static/js/grid/Main.tsx @@ -24,30 +24,42 @@ import { Box, Flex, useDisclosure, - Button, Divider, + Spinner, } from '@chakra-ui/react'; +import { isEmpty, debounce } from 'lodash'; import Details from './details'; import useSelection from './utils/useSelection'; import Grid from './Grid'; import FilterBar from './FilterBar'; import LegendRow from './LegendRow'; +import { useGridData } from './api'; +import { hoverDelay } from './utils'; const detailsPanelKey = 'hideDetailsPanel'; const Main = () => { + const { data: { groups }, isLoading } = useGridData(); const isPanelOpen = localStorage.getItem(detailsPanelKey) !== 'true'; const { isOpen, onToggle } = useDisclosure({ defaultIsOpen: isPanelOpen }); const { clearSelection } = useSelection(); - const [hoveredTaskState, setHoveredTaskState] = useState(); + const [hoveredTaskState, setHoveredTaskState] = useState(); - const toggleDetailsPanel = () => { + // Add a debounced delay to not constantly trigger highlighting certain task states + const onStatusHover = debounce((state) => setHoveredTaskState(state), hoverDelay); + + const onStatusLeave = () => { + setHoveredTaskState(undefined); + onStatusHover.cancel(); + }; + + const onPanelToggle = () => { if (!isOpen) { - localStorage.setItem(detailsPanelKey, false); + localStorage.setItem(detailsPanelKey, 'false'); } else { clearSelection(); - localStorage.setItem(detailsPanelKey, true); + localStorage.setItem(detailsPanelKey, 'true'); } onToggle(); }; @@ -55,24 +67,23 @@ const Main = () => { return ( - + - - - - - {isOpen && (
)} - + + {isLoading || isEmpty(groups) + ? () + : ( + <> + + + {isOpen && (
)} + + + )} ); diff --git a/airflow/www/static/js/grid/ResetRoot.jsx b/airflow/www/static/js/grid/ResetRoot.jsx index c69e8dbb84cf7..bd6a89e3b7d20 100644 --- a/airflow/www/static/js/grid/ResetRoot.jsx +++ b/airflow/www/static/js/grid/ResetRoot.jsx @@ -33,7 +33,7 @@ const ResetRoot = () => ( variant="outline" href={url} colorScheme="blue" - mr={2} + mx={2} title="Reset root to show the whole DAG" > Reset Root diff --git a/airflow/www/static/js/grid/ToggleGroups.jsx b/airflow/www/static/js/grid/ToggleGroups.jsx index 3705d67d80241..2f027cd668776 100644 --- a/airflow/www/static/js/grid/ToggleGroups.jsx +++ b/airflow/www/static/js/grid/ToggleGroups.jsx @@ -34,15 +34,15 @@ const getGroupIds = (groups) => { }; const ToggleGroups = ({ groups, openGroupIds, onToggleGroups }) => { + // Don't show button if the DAG has no task groups + const hasGroups = groups.children && groups.children.find((c) => !!c.children); + if (!hasGroups) return null; + const allGroupIds = getGroupIds(groups.children); const isExpandDisabled = allGroupIds.length === openGroupIds.length; const isCollapseDisabled = !openGroupIds.length; - // Don't show button if the DAG has no task groups - const hasGroups = groups.children.find((c) => !!c.children); - if (!hasGroups) return null; - const onExpand = () => { onToggleGroups(allGroupIds); }; diff --git a/airflow/www/static/js/grid/api/index.js b/airflow/www/static/js/grid/api/index.ts similarity index 93% rename from airflow/www/static/js/grid/api/index.js rename to airflow/www/static/js/grid/api/index.ts index 3487ecd6eaff0..0ac8e4e28410d 100644 --- a/airflow/www/static/js/grid/api/index.js +++ b/airflow/www/static/js/grid/api/index.ts @@ -17,7 +17,7 @@ * under the License. */ -import axios from 'axios'; +import axios, { AxiosResponse } from 'axios'; import camelcaseKeys from 'camelcase-keys'; import useTasks from './useTasks'; @@ -35,7 +35,7 @@ import useGridData from './useGridData'; import useMappedInstances from './useMappedInstances'; axios.interceptors.response.use( - (res) => (res.data ? camelcaseKeys(res.data, { deep: true }) : res), + (res: AxiosResponse) => (res.data ? camelcaseKeys(res.data, { deep: true }) : res), ); axios.defaults.headers.common.Accept = 'application/json'; diff --git a/airflow/www/static/js/grid/utils/gridData.test.js b/airflow/www/static/js/grid/api/useGridData.test.js similarity index 96% rename from airflow/www/static/js/grid/utils/gridData.test.js rename to airflow/www/static/js/grid/api/useGridData.test.js index 6bbd8bf8b6a6d..29a7f1ac8a11f 100644 --- a/airflow/www/static/js/grid/utils/gridData.test.js +++ b/airflow/www/static/js/grid/api/useGridData.test.js @@ -19,7 +19,7 @@ /* global describe, test, expect */ -import { areActiveRuns } from './gridData'; +import { areActiveRuns } from './useGridData'; describe('Test areActiveRuns()', () => { test('Correctly detects active runs', () => { diff --git a/airflow/www/static/js/grid/api/useGridData.test.jsx b/airflow/www/static/js/grid/api/useGridData.test.jsx deleted file mode 100644 index 24aece6f5919f..0000000000000 --- a/airflow/www/static/js/grid/api/useGridData.test.jsx +++ /dev/null @@ -1,78 +0,0 @@ -/*! - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* global describe, test, expect, beforeAll */ - -import { renderHook } from '@testing-library/react-hooks'; -import useGridData from './useGridData'; -import { Wrapper } from '../utils/testUtils'; - -const pendingGridData = { - groups: {}, - dag_runs: [ - { - dag_id: 'example_python_operator', - run_id: 'manual__2021-11-08T21:14:17.170046+00:00', - start_date: null, - end_date: null, - state: 'queued', - execution_date: '2021-11-08T21:14:17.170046+00:00', - data_interval_start: '2021-11-08T21:14:17.170046+00:00', - data_interval_end: '2021-11-08T21:14:17.170046+00:00', - run_type: 'manual', - }, - ], -}; - -describe('Test useGridData hook', () => { - beforeAll(() => { - global.autoRefreshInterval = 5; - }); - - test('data is valid camelcase json', () => { - global.gridData = JSON.stringify(pendingGridData); - - const { result } = renderHook(() => useGridData(), { wrapper: Wrapper }); - const { data } = result.current; - - expect(typeof data === 'object').toBe(true); - expect(data.dagRuns).toBeDefined(); - expect(data.dag_runs).toBeUndefined(); - }); - - test('Can handle no gridData', () => { - global.gridData = null; - - const { result } = renderHook(() => useGridData(), { wrapper: Wrapper }); - const { data } = result.current; - - expect(data.dagRuns).toStrictEqual([]); - expect(data.groups).toStrictEqual({}); - }); - - test('Can handle empty gridData object', () => { - global.gridData = {}; - - const { result } = renderHook(() => useGridData(), { wrapper: Wrapper }); - const { data } = result.current; - - expect(data.dagRuns).toStrictEqual([]); - expect(data.groups).toStrictEqual({}); - }); -}); diff --git a/airflow/www/static/js/grid/api/useGridData.js b/airflow/www/static/js/grid/api/useGridData.ts similarity index 63% rename from airflow/www/static/js/grid/api/useGridData.js rename to airflow/www/static/js/grid/api/useGridData.ts index d31712989bcfd..ec12ee6d601dd 100644 --- a/airflow/www/static/js/grid/api/useGridData.js +++ b/airflow/www/static/js/grid/api/useGridData.ts @@ -17,18 +17,16 @@ * under the License. */ -/* global autoRefreshInterval, gridData */ - import { useQuery } from 'react-query'; -import axios from 'axios'; +import axios, { AxiosResponse } from 'axios'; import { getMetaValue } from '../../utils'; import { useAutoRefresh } from '../context/autorefresh'; -import { areActiveRuns, formatData } from '../utils/gridData'; import useErrorToast from '../utils/useErrorToast'; import useFilters, { BASE_DATE_PARAM, NUM_RUNS_PARAM, RUN_STATE_PARAM, RUN_TYPE_PARAM, now, } from '../utils/useFilters'; +import type { Task, DagRun } from '../types'; const DAG_ID_PARAM = 'dag_id'; @@ -37,13 +35,23 @@ const dagId = getMetaValue(DAG_ID_PARAM); const gridDataUrl = getMetaValue('grid_data_url') || ''; const urlRoot = getMetaValue('root'); -const emptyData = { +interface GridData { + dagRuns: DagRun[]; + groups: Task; +} + +const emptyGridData: GridData = { dagRuns: [], - groups: {}, + groups: { + id: null, + label: null, + instances: [], + }, }; +export const areActiveRuns = (runs: DagRun[] = []) => runs.filter((run) => ['queued', 'running', 'scheduled'].includes(run.state)).length > 0; + const useGridData = () => { - const initialData = formatData(gridData, emptyData); const { isRefreshOn, stopRefresh } = useAutoRefresh(); const errorToast = useErrorToast(); const { @@ -52,8 +60,9 @@ const useGridData = () => { }, } = useFilters(); - return useQuery(['gridData', baseDate, numRuns, runType, runState], async () => { - try { + const query = useQuery( + ['gridData', baseDate, numRuns, runType, runState], + async () => { const params = { root: urlRoot || undefined, [DAG_ID_PARAM]: dagId, @@ -62,25 +71,29 @@ const useGridData = () => { [RUN_TYPE_PARAM]: runType, [RUN_STATE_PARAM]: runState, }; - const newData = await axios.get(gridDataUrl, { params }); + const response = await axios.get(gridDataUrl, { params }); // turn off auto refresh if there are no active runs - if (!areActiveRuns(newData.dagRuns)) stopRefresh(); - return newData; - } catch (error) { - stopRefresh(); - errorToast({ - title: 'Auto-refresh Error', - error, - }); - throw (error); - } - }, { - initialData, - placeholderData: emptyData, - // only refetch if the refresh switch is on - refetchInterval: isRefreshOn && autoRefreshInterval * 1000, - keepPreviousData: true, - }); + if (!areActiveRuns(response.dagRuns)) stopRefresh(); + return response; + }, + { + // only refetch if the refresh switch is on + refetchInterval: isRefreshOn && (autoRefreshInterval || 1) * 1000, + keepPreviousData: true, + onError: (error) => { + stopRefresh(); + errorToast({ + title: 'Auto-refresh Error', + error, + }); + throw (error); + }, + }, + ); + return { + ...query, + data: query.data ?? emptyGridData, + }; }; export default useGridData; diff --git a/airflow/www/static/js/grid/api/useMappedInstances.js b/airflow/www/static/js/grid/api/useMappedInstances.js index 35eb1decb636e..cc42f2616de05 100644 --- a/airflow/www/static/js/grid/api/useMappedInstances.js +++ b/airflow/www/static/js/grid/api/useMappedInstances.js @@ -40,7 +40,10 @@ export default function useMappedInstances({ }), { keepPreviousData: true, + initialData: { taskInstances: [], totalEntries: 0 }, refetchInterval: isRefreshOn && autoRefreshInterval * 1000, + // staleTime should be similar to the refresh interval + staleTime: autoRefreshInterval * 1000, }, ); } diff --git a/airflow/www/static/js/grid/api/useTasks.test.jsx b/airflow/www/static/js/grid/api/useTasks.test.jsx new file mode 100644 index 0000000000000..d642dae32a55c --- /dev/null +++ b/airflow/www/static/js/grid/api/useTasks.test.jsx @@ -0,0 +1,86 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* global describe, test, expect, beforeEach, afterEach, jest */ + +import React from 'react'; +import { renderHook, waitFor } from '@testing-library/react'; +import { QueryClient, QueryClientProvider } from 'react-query'; +import nock from 'nock'; + +import useTasks from './useTasks'; +import * as metaUtils from '../../utils'; + +const Wrapper = ({ children }) => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { + retry: 0, + }, + }, + }); + return ( + + {children} + + ); +}; + +const fakeUrl = 'http://fake.api'; + +describe('Test useTasks hook', () => { + let spy; + beforeEach(() => { + spy = jest.spyOn(metaUtils, 'getMetaValue').mockReturnValue(`${fakeUrl}`); + }); + + afterEach(() => { + spy.mockRestore(); + nock.cleanAll(); + }); + + test('initialData works normally', async () => { + const scope = nock(fakeUrl) + .get('/') + .reply(200, { totalEntries: 1, tasks: [{ taskId: 'task_id' }] }); + const { result } = renderHook(() => useTasks(), { wrapper: Wrapper }); + + expect(result.current.data.totalEntries).toBe(0); + expect(result.current.isFetching).toBeTruthy(); + + await waitFor(() => expect(result.current.isFetching).toBeFalsy()); + + expect(result.current.data.totalEntries).toBe(1); + scope.done(); + }); + + test('initialData persists even if there is an error', async () => { + const scope = nock(fakeUrl) + .get('/') + .replyWithError('something awful happened'); + const { result } = renderHook(() => useTasks(), { wrapper: Wrapper }); + + expect(result.current.data.totalEntries).toBe(0); + + await waitFor(() => expect(result.current.isError).toBeTruthy()); + + expect(result.current.data.totalEntries).toBe(0); + scope.done(); + }); +}); diff --git a/airflow/www/static/js/grid/utils/gridData.js b/airflow/www/static/js/grid/api/useTasks.ts similarity index 59% rename from airflow/www/static/js/grid/utils/gridData.js rename to airflow/www/static/js/grid/api/useTasks.ts index 95171652b7a7b..68878a78a006a 100644 --- a/airflow/www/static/js/grid/utils/gridData.js +++ b/airflow/www/static/js/grid/api/useTasks.ts @@ -17,18 +17,25 @@ * under the License. */ -import camelcaseKeys from 'camelcase-keys'; +import axios, { AxiosResponse } from 'axios'; +import { useQuery } from 'react-query'; +import { getMetaValue } from '../../utils'; -export const areActiveRuns = (runs = []) => runs.filter((run) => ['queued', 'running', 'scheduled'].includes(run.state)).length > 0; +interface TaskData { + tasks: any[]; + totalEntries: number; +} -export const formatData = (data, emptyData) => { - if (!data || !Object.keys(data).length) { - return emptyData; - } - let formattedData = data; - // Convert to json if needed - if (typeof data === 'string') formattedData = JSON.parse(data); - // change from pascal to camelcase - formattedData = camelcaseKeys(formattedData, { deep: true }); - return formattedData; -}; +export default function useTasks() { + const query = useQuery( + 'tasks', + () => { + const tasksUrl = getMetaValue('tasks_api'); + return axios.get(tasksUrl || ''); + }, + ); + return { + ...query, + data: query.data || { tasks: [], totalEntries: 0 }, + }; +} diff --git a/airflow/www/static/js/grid/components/Clipboard.jsx b/airflow/www/static/js/grid/components/Clipboard.jsx index 794e363fa0b0e..a4e0acca2884c 100644 --- a/airflow/www/static/js/grid/components/Clipboard.jsx +++ b/airflow/www/static/js/grid/components/Clipboard.jsx @@ -60,7 +60,6 @@ export const ClipboardButton = forwardRef( label="Copied" isOpen={hasCopied} isDisabled={!hasCopied} - closeDelay={500} placement="top" portalProps={{ containerRef }} > diff --git a/airflow/www/static/js/grid/components/InstanceTooltip.test.jsx b/airflow/www/static/js/grid/components/InstanceTooltip.test.tsx similarity index 78% rename from airflow/www/static/js/grid/components/InstanceTooltip.test.jsx rename to airflow/www/static/js/grid/components/InstanceTooltip.test.tsx index fc6ab848c958d..71e1147da9453 100644 --- a/airflow/www/static/js/grid/components/InstanceTooltip.test.jsx +++ b/airflow/www/static/js/grid/components/InstanceTooltip.test.tsx @@ -24,19 +24,21 @@ import { render } from '@testing-library/react'; import InstanceTooltip from './InstanceTooltip'; import { Wrapper } from '../utils/testUtils'; +import type { TaskState } from '../types'; const instance = { - startDate: new Date(), - endDate: new Date(), - state: 'success', + startDate: new Date().toISOString(), + endDate: new Date().toISOString(), + state: 'success' as TaskState, runId: 'run', + taskId: 'task', }; describe('Test Task InstanceTooltip', () => { test('Displays a normal task', () => { const { getByText } = render( , { wrapper: Wrapper }, @@ -48,8 +50,10 @@ describe('Test Task InstanceTooltip', () => { test('Displays a mapped task with overall status', () => { const { getByText } = render( , { wrapper: Wrapper }, ); @@ -63,12 +67,20 @@ describe('Test Task InstanceTooltip', () => { const { getByText, queryByText } = render( { +}: Props) => { + if (!group) return null; const isGroup = !!group.children; - const { isMapped } = group; - const summary = []; + const summary: React.ReactNode[] = []; + + const isMapped = group?.isMapped; const numMap = finalStatesMap(); - if (isGroup) { + let numMapped = 0; + if (isGroup && group.children) { group.children.forEach((child) => { const taskInstance = child.instances.find((ti) => ti.runId === runId); if (taskInstance) { const stateKey = taskInstance.state == null ? 'no_status' : taskInstance.state; - if (numMap.has(stateKey)) numMap.set(stateKey, numMap.get(stateKey) + 1); + if (numMap.has(stateKey)) numMap.set(stateKey, (numMap.get(stateKey) || 0) + 1); } }); } else if (isMapped && mappedStates) { - mappedStates.forEach((s) => { - const stateKey = s || 'no_status'; - if (numMap.has(stateKey)) numMap.set(stateKey, numMap.get(stateKey) + 1); + Object.keys(mappedStates).forEach((stateKey) => { + const num = mappedStates[stateKey]; + numMapped += num; + numMap.set(stateKey || 'no_status', num); }); } @@ -68,12 +78,12 @@ const InstanceTooltip = ({ {group.tooltip && ( {group.tooltip} )} - {isMapped && !!mappedStates.length && ( + {isMapped && numMapped > 0 && ( - {mappedStates.length} + {numMapped} {' '} mapped task - {mappedStates.length > 1 && 's'} + {numMapped > 1 && 's'} )} @@ -86,7 +96,7 @@ const InstanceTooltip = ({ Started: {' '} - Duration: diff --git a/airflow/www/static/js/grid/components/StatusBox.jsx b/airflow/www/static/js/grid/components/StatusBox.tsx similarity index 68% rename from airflow/www/static/js/grid/components/StatusBox.jsx rename to airflow/www/static/js/grid/components/StatusBox.tsx index 316529930f3b9..dd8dfabfb14b1 100644 --- a/airflow/www/static/js/grid/components/StatusBox.jsx +++ b/airflow/www/static/js/grid/components/StatusBox.tsx @@ -17,36 +17,49 @@ * under the License. */ -/* global stateColors */ - import React from 'react'; import { isEqual } from 'lodash'; import { Box, - Tooltip, useTheme, + BoxProps, } from '@chakra-ui/react'; +import Tooltip from './Tooltip'; import InstanceTooltip from './InstanceTooltip'; import { useContainerRef } from '../context/containerRef'; +import type { Task, TaskInstance, TaskState } from '../types'; +import type { SelectionProps } from '../utils/useSelection'; +import { hoverDelay } from '../utils'; export const boxSize = 10; export const boxSizePx = `${boxSize}px`; -export const SimpleStatus = ({ state, ...rest }) => ( +interface SimpleStatusProps extends BoxProps { + state: TaskState; +} + +export const SimpleStatus = ({ state, ...rest }: SimpleStatusProps) => ( ); +interface Props { + group: Task; + instance: TaskInstance; + onSelect: (selection: SelectionProps) => void; + isActive: boolean; +} + const StatusBox = ({ group, instance, onSelect, isActive, -}) => { +}: Props) => { const containerRef = useContainerRef(); const { runId, taskId } = instance; const { colors } = useTheme(); @@ -54,15 +67,19 @@ const StatusBox = ({ // Fetch the corresponding column element and set its background color when hovering const onMouseEnter = () => { - [...containerRef.current.getElementsByClassName(`js-${runId}`)] - .forEach((e) => { - // Don't apply hover if it is already selected - if (e.getAttribute('data-selected') === 'false') e.style.backgroundColor = hoverBlue; - }); + if (containerRef && containerRef.current) { + ([...containerRef.current.getElementsByClassName(`js-${runId}`)] as HTMLElement[]) + .forEach((e) => { + // Don't apply hover if it is already selected + if (e.getAttribute('data-selected') === 'false') e.style.backgroundColor = hoverBlue; + }); + } }; const onMouseLeave = () => { - [...containerRef.current.getElementsByClassName(`js-${runId}`)] - .forEach((e) => { e.style.backgroundColor = null; }); + if (containerRef && containerRef.current) { + ([...containerRef.current.getElementsByClassName(`js-${runId}`)] as HTMLElement[]) + .forEach((e) => { e.style.backgroundColor = ''; }); + } }; const onClick = () => { @@ -76,7 +93,7 @@ const StatusBox = ({ portalProps={{ containerRef }} hasArrow placement="top" - openDelay={400} + openDelay={hoverDelay} > ( isEqual(prevProps.group, nextProps.group) && isEqual(prevProps.instance, nextProps.instance) diff --git a/airflow/www/static/js/grid/components/Table.jsx b/airflow/www/static/js/grid/components/Table.jsx index 570becf12aa9e..57bef0a911e5d 100644 --- a/airflow/www/static/js/grid/components/Table.jsx +++ b/airflow/www/static/js/grid/components/Table.jsx @@ -79,6 +79,7 @@ const Table = ({ hooks.visibleColumns.push((cols) => [ { id: 'selection', + // eslint-disable-next-line react/no-unstable-nested-components Cell: ({ row }) => (
diff --git a/airflow/www/static/js/grid/components/TaskName.jsx b/airflow/www/static/js/grid/components/TaskName.jsx index 316dd89e956bd..b46c6f03808a2 100644 --- a/airflow/www/static/js/grid/components/TaskName.jsx +++ b/airflow/www/static/js/grid/components/TaskName.jsx @@ -40,7 +40,7 @@ const TaskName = ({ {label} {isMapped && ( diff --git a/airflow/www/static/js/grid/components/TaskName.test.jsx b/airflow/www/static/js/grid/components/TaskName.test.jsx index 9a403735ba885..a9760da303592 100644 --- a/airflow/www/static/js/grid/components/TaskName.test.jsx +++ b/airflow/www/static/js/grid/components/TaskName.test.jsx @@ -27,25 +27,19 @@ import { ChakraWrapper } from '../utils/testUtils'; describe('Test TaskName', () => { test('Displays a normal task name', () => { - const { getByText } = render( - , { wrapper: ChakraWrapper }, - ); + const { getByText } = render(, { wrapper: ChakraWrapper }); expect(getByText('test')).toBeDefined(); }); test('Displays a mapped task name', () => { - const { getByText } = render( - , { wrapper: ChakraWrapper }, - ); + const { getByText } = render(, { wrapper: ChakraWrapper }); expect(getByText('test [ ]')).toBeDefined(); }); test('Displays a group task name', () => { - const { getByText, getByTestId } = render( - , { wrapper: ChakraWrapper }, - ); + const { getByText, getByTestId } = render(, { wrapper: ChakraWrapper }); expect(getByText('test')).toBeDefined(); expect(getByTestId('closed-group')).toBeDefined(); diff --git a/airflow/www/static/js/grid/components/Time.test.jsx b/airflow/www/static/js/grid/components/Time.test.tsx similarity index 90% rename from airflow/www/static/js/grid/components/Time.test.jsx rename to airflow/www/static/js/grid/components/Time.test.tsx index c3044c59fc917..a7de6f5f328b5 100644 --- a/airflow/www/static/js/grid/components/Time.test.jsx +++ b/airflow/www/static/js/grid/components/Time.test.tsx @@ -17,7 +17,7 @@ * under the License. */ -/* global describe, test, expect, document, Event */ +/* global describe, test, expect, document, CustomEvent */ import React from 'react'; import { @@ -33,7 +33,7 @@ describe('Test Time and TimezoneProvider', () => { test('Displays a UTC time correctly', () => { const now = new Date(); const { getByText } = render( -
''' == rendered ) def test_wrapped_markdown_with_table(self): rendered = wrapped_markdown( - """| Job | Duration | - | ----------- | ----------- | - | ETL | 14m |""" + """ +| Job | Duration | +| ----------- | ----------- | +| ETL | 14m | +""" ) assert ( - '
\n\n\n\n' - '\n\n\n\n\n\n\n\n\n' - '
JobDuration
ETL' - '14m
' - ) == rendered + '''
+ + + + + + + + + + + + +
JobDuration
ETL14m
+
''' + == rendered + ) def test_wrapped_markdown_with_indented_lines(self): rendered = wrapped_markdown( @@ -217,7 +242,11 @@ def test_wrapped_markdown_with_indented_lines(self): """ ) - assert '

header

\n

1st line\n2nd line

' == rendered + assert ( + '''

header

\n

1st line\n2nd line

+
''' + == rendered + ) def test_wrapped_markdown_with_raw_code_block(self): rendered = wrapped_markdown( @@ -235,10 +264,12 @@ def test_wrapped_markdown_with_raw_code_block(self): ) assert ( - '

Markdown code block

\n' - '

Inline code works well.

\n' - '
Code block\ndoes not\nrespect\nnewlines\n
' - ) == rendered + '''

Markdown code block

+

Inline code works well.

+
Code block\ndoes not\nrespect\nnewlines\n
+
''' + == rendered + ) def test_wrapped_markdown_with_nested_list(self): rendered = wrapped_markdown( @@ -251,6 +282,49 @@ def test_wrapped_markdown_with_nested_list(self): ) assert ( - '

Docstring with a code block

\n' - '
    \n
  • And
      \n
    • A nested list
    • \n
    \n
  • \n
' - ) == rendered + '''

Docstring with a code block

+
    +
  • And +
      +
    • A nested list
    • +
    +
  • +
+
''' + == rendered + ) + + def test_wrapped_markdown_with_collapsible_section(self): + rendered = wrapped_markdown( + """ +# A collapsible section with markdown +
+ Click to expand! + + ## Heading + 1. A numbered + 2. list + * With some + * Sub bullets +
+ """ + ) + + assert ( + '''

A collapsible section with markdown

+
+ Click to expand! +

Heading

+
    +
  1. A numbered
  2. +
  3. list +
      +
    • With some
    • +
    • Sub bullets
    • +
    +
  4. +
+
+
''' + == rendered + ) diff --git a/tests/www/views/__init__.py b/tests/www/views/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/www/views/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index 887bd4898a0a6..fa79e145cba6c 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -375,52 +375,55 @@ def test_get_task_stats_from_query(): assert data == expected_data +INVALID_DATETIME_RESPONSE = "Invalid datetime: 'invalid'" + + @pytest.mark.parametrize( "url, content", [ ( '/rendered-templates?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( '/log?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( '/redirect_to_external_log?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( '/task?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'dags/example_bash_operator/graph?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'dags/example_bash_operator/graph?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'dags/example_bash_operator/duration?base_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'dags/example_bash_operator/tries?base_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'dags/example_bash_operator/landing-times?base_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'dags/example_bash_operator/gantt?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ( 'extra_links?execution_date=invalid', - "Invalid datetime: 'invalid'", + INVALID_DATETIME_RESPONSE, ), ], ) diff --git a/tests/www/views/test_views_acl.py b/tests/www/views/test_views_acl.py index d96098a5c6d5d..84003bf366017 100644 --- a/tests/www/views/test_views_acl.py +++ b/tests/www/views/test_views_acl.py @@ -563,7 +563,7 @@ def client_dags_tis_logs(acl_app, user_dags_tis_logs): TRIES_URL = "tries?days=30&dag_id=example_bash_operator" LANDING_TIMES_URL = "landing_times?days=30&dag_id=example_bash_operator" GANTT_URL = "gantt?dag_id=example_bash_operator" -TREE_URL = "tree?dag_id=example_bash_operator" +GRID_DATA_URL = "object/grid_data?dag_id=example_bash_operator" LOG_URL = ( f"log?task_id=runme_0&dag_id=example_bash_operator&" f"execution_date={urllib.parse.quote_plus(str(DEFAULT_DATE))}" @@ -581,8 +581,8 @@ def client_dags_tis_logs(acl_app, user_dags_tis_logs): ("client_all_dags_tis", TRIES_URL, "example_bash_operator"), ("client_all_dags_tis", LANDING_TIMES_URL, "example_bash_operator"), ("client_all_dags_tis", GANTT_URL, "example_bash_operator"), - ("client_dags_tis_logs", TREE_URL, "runme_1"), - ("viewer_client", TREE_URL, "runme_1"), + ("client_dags_tis_logs", GRID_DATA_URL, "runme_1"), + ("viewer_client", GRID_DATA_URL, "runme_1"), ("client_dags_tis_logs", LOG_URL, "Log by attempts"), ("user_client", LOG_URL, "Log by attempts"), ], @@ -595,8 +595,8 @@ def client_dags_tis_logs(acl_app, user_dags_tis_logs): "tries", "landing-times", "gantt", - "tree-for-readonly-role", - "tree-for-viewer", + "grid-data-for-readonly-role", + "grid-data-for-viewer", "log", "log-for-user", ], diff --git a/tests/www/views/test_views_decorators.py b/tests/www/views/test_views_decorators.py index 0e4fc12857a8f..1de80c1214a28 100644 --- a/tests/www/views/test_views_decorators.py +++ b/tests/www/views/test_views_decorators.py @@ -213,9 +213,9 @@ def test_action_has_dag_edit_access(create_task_instance, class_type, no_instanc else: test_items = tis if class_type == TaskInstance else [ti.get_dagrun() for ti in tis] test_items = test_items[0] if len(test_items) == 1 else test_items - - with app.create_app(testing=True).app_context(): - with mock.patch("airflow.www.views.current_app.appbuilder.sm.can_edit_dag") as mocked_can_edit: + application = app.create_app(testing=True) + with application.app_context(): + with mock.patch.object(application.appbuilder.sm, "can_edit_dag") as mocked_can_edit: mocked_can_edit.return_value = True assert not isinstance(test_items, list) or len(test_items) == no_instances assert some_view_action_which_requires_dag_edit_access(None, test_items) is True diff --git a/tests/www/views/test_views_grid.py b/tests/www/views/test_views_grid.py new file mode 100644 index 0000000000000..a64a0452bb8a0 --- /dev/null +++ b/tests/www/views/test_views_grid.py @@ -0,0 +1,260 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List + +import freezegun +import pendulum +import pytest + +from airflow.models import DagBag +from airflow.models.dagrun import DagRun +from airflow.operators.empty import EmptyOperator +from airflow.utils.state import DagRunState, TaskInstanceState +from airflow.utils.task_group import TaskGroup +from airflow.utils.types import DagRunType +from airflow.www.views import dag_to_grid +from tests.test_utils.asserts import assert_queries_count +from tests.test_utils.db import clear_db_runs +from tests.test_utils.mock_operators import MockOperator + +DAG_ID = 'test' +CURRENT_TIME = pendulum.DateTime(2021, 9, 7) + + +@pytest.fixture(autouse=True, scope="module") +def examples_dag_bag(): + # Speed up: We don't want example dags for this module + return DagBag(include_examples=False, read_dags_from_db=True) + + +@pytest.fixture(autouse=True) +def clean(): + clear_db_runs() + yield + clear_db_runs() + + +@pytest.fixture +def dag_without_runs(dag_maker, session, app, monkeypatch): + with monkeypatch.context() as m: + # Remove global operator links for this test + m.setattr('airflow.plugins_manager.global_operator_extra_links', []) + m.setattr('airflow.plugins_manager.operator_extra_links', []) + m.setattr('airflow.plugins_manager.registered_operator_link_classes', {}) + + with dag_maker(dag_id=DAG_ID, serialized=True, session=session): + EmptyOperator(task_id="task1") + with TaskGroup(group_id='group'): + MockOperator.partial(task_id='mapped').expand(arg1=['a', 'b', 'c', 'd']) + + m.setattr(app, 'dag_bag', dag_maker.dagbag) + yield dag_maker + + +@pytest.fixture +def dag_with_runs(dag_without_runs): + with freezegun.freeze_time(CURRENT_TIME): + date = dag_without_runs.dag.start_date + run_1 = dag_without_runs.create_dagrun( + run_id='run_1', state=DagRunState.SUCCESS, run_type=DagRunType.SCHEDULED, execution_date=date + ) + run_2 = dag_without_runs.create_dagrun( + run_id='run_2', + run_type=DagRunType.SCHEDULED, + execution_date=dag_without_runs.dag.next_dagrun_info(date).logical_date, + ) + + yield run_1, run_2 + + +def test_no_runs(admin_client, dag_without_runs): + resp = admin_client.get(f'/object/grid_data?dag_id={DAG_ID}', follow_redirects=True) + assert resp.status_code == 200, resp.json + assert resp.json == { + 'dag_runs': [], + 'groups': { + 'children': [ + { + 'extra_links': [], + 'id': 'task1', + 'instances': [], + 'is_mapped': False, + 'label': 'task1', + }, + { + 'children': [ + { + 'extra_links': [], + 'id': 'group.mapped', + 'instances': [], + 'is_mapped': True, + 'label': 'mapped', + } + ], + 'id': 'group', + 'instances': [], + 'label': 'group', + 'tooltip': '', + }, + ], + 'id': None, + 'instances': [], + 'label': None, + }, + } + + +def test_one_run(admin_client, dag_with_runs: List[DagRun], session): + """ + Test a DAG with complex interaction of states: + - One run successful + - One run partly success, partly running + - One TI not yet finished + """ + run1, run2 = dag_with_runs + + for ti in run1.task_instances: + ti.state = TaskInstanceState.SUCCESS + for ti in sorted(run2.task_instances, key=lambda ti: (ti.task_id, ti.map_index)): + if ti.task_id == "task1": + ti.state = TaskInstanceState.SUCCESS + elif ti.task_id == "group.mapped": + if ti.map_index == 0: + ti.state = TaskInstanceState.SUCCESS + ti.start_date = pendulum.DateTime(2021, 7, 1, 1, 0, 0, tzinfo=pendulum.UTC) + ti.end_date = pendulum.DateTime(2021, 7, 1, 1, 2, 3, tzinfo=pendulum.UTC) + elif ti.map_index == 1: + ti.state = TaskInstanceState.RUNNING + ti.start_date = pendulum.DateTime(2021, 7, 1, 2, 3, 4, tzinfo=pendulum.UTC) + ti.end_date = None + + session.flush() + + resp = admin_client.get(f'/object/grid_data?dag_id={DAG_ID}', follow_redirects=True) + assert resp.status_code == 200, resp.json + assert resp.json == { + 'dag_runs': [ + { + 'data_interval_end': '2016-01-02T00:00:00+00:00', + 'data_interval_start': '2016-01-01T00:00:00+00:00', + 'end_date': '2021-09-07T00:00:00+00:00', + 'execution_date': '2016-01-01T00:00:00+00:00', + 'last_scheduling_decision': None, + 'run_id': 'run_1', + 'run_type': 'scheduled', + 'start_date': '2016-01-01T00:00:00+00:00', + 'state': 'success', + }, + { + 'data_interval_end': '2016-01-03T00:00:00+00:00', + 'data_interval_start': '2016-01-02T00:00:00+00:00', + 'end_date': None, + 'execution_date': '2016-01-02T00:00:00+00:00', + 'last_scheduling_decision': None, + 'run_id': 'run_2', + 'run_type': 'scheduled', + 'start_date': '2016-01-01T00:00:00+00:00', + 'state': 'running', + }, + ], + 'groups': { + 'children': [ + { + 'extra_links': [], + 'id': 'task1', + 'instances': [ + { + 'run_id': 'run_1', + 'start_date': None, + 'end_date': None, + 'state': 'success', + 'task_id': 'task1', + 'try_number': 1, + }, + { + 'run_id': 'run_2', + 'start_date': None, + 'end_date': None, + 'state': 'success', + 'task_id': 'task1', + 'try_number': 1, + }, + ], + 'is_mapped': False, + 'label': 'task1', + }, + { + 'children': [ + { + 'extra_links': [], + 'id': 'group.mapped', + 'instances': [ + { + 'run_id': 'run_1', + 'mapped_states': {'success': 4}, + 'start_date': None, + 'end_date': None, + 'state': 'success', + 'task_id': 'group.mapped', + }, + { + 'run_id': 'run_2', + 'mapped_states': {'no_status': 2, 'running': 1, 'success': 1}, + 'start_date': '2021-07-01T01:00:00+00:00', + 'end_date': '2021-07-01T01:02:03+00:00', + 'state': 'running', + 'task_id': 'group.mapped', + }, + ], + 'is_mapped': True, + 'label': 'mapped', + }, + ], + 'id': 'group', + 'instances': [ + { + 'end_date': None, + 'run_id': 'run_1', + 'start_date': None, + 'state': 'success', + 'task_id': 'group', + }, + { + 'run_id': 'run_2', + 'start_date': '2021-07-01T01:00:00+00:00', + 'end_date': '2021-07-01T01:02:03+00:00', + 'state': 'running', + 'task_id': 'group', + }, + ], + 'label': 'group', + 'tooltip': '', + }, + ], + 'id': None, + 'instances': [], + 'label': None, + }, + } + + +def test_query_count(dag_with_runs, session): + run1, run2 = dag_with_runs + with assert_queries_count(1): + dag_to_grid(run1.dag, (run1, run2), session) diff --git a/tests/www/views/test_views_log.py b/tests/www/views/test_views_log.py index f697cd3772c28..fd136351cf1ad 100644 --- a/tests/www/views/test_views_log.py +++ b/tests/www/views/test_views_log.py @@ -85,9 +85,6 @@ def factory(): logging_config['handlers']['task']['base_log_folder'] = str( pathlib.Path(__file__, "..", "..", "test_logs").resolve(), ) - logging_config['handlers']['task'][ - 'filename_template' - ] = '{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts | replace(":", ".") }}/{{ try_number }}.log' with tempfile.TemporaryDirectory() as settings_dir: local_settings = pathlib.Path(settings_dir, "airflow_local_settings.py") @@ -464,7 +461,7 @@ def test_redirect_to_external_log_with_local_log_handler(log_admin_client, task_ ) response = log_admin_client.get(url) assert 302 == response.status_code - assert 'http://localhost/home' == response.headers['Location'] + assert '/home' == response.headers['Location'] class _ExternalHandler(ExternalLoggingMixin): diff --git a/tests/www/views/test_views_mount.py b/tests/www/views/test_views_mount.py index a9fb8746657df..3f504e9b0f168 100644 --- a/tests/www/views/test_views_mount.py +++ b/tests/www/views/test_views_mount.py @@ -36,7 +36,7 @@ def factory(): @pytest.fixture() def client(app): - return werkzeug.test.Client(app, werkzeug.wrappers.BaseResponse) + return werkzeug.test.Client(app, werkzeug.wrappers.response.Response) def test_mount(client): @@ -54,4 +54,4 @@ def test_not_found(client): def test_index(client): resp = client.get('/test/') assert resp.status_code == 302 - assert resp.headers['Location'] == 'http://localhost/test/home' + assert resp.headers['Location'] == '/test/home' diff --git a/tests/www/views/test_views_tasks.py b/tests/www/views/test_views_tasks.py index 3428ac848aecd..d99a8fd5cacdc 100644 --- a/tests/www/views/test_views_tasks.py +++ b/tests/www/views/test_views_tasks.py @@ -165,24 +165,14 @@ def client_ti_without_dag_edit(app): id='graph', ), pytest.param( - 'tree?dag_id=example_bash_operator', + 'object/grid_data?dag_id=example_bash_operator', ['runme_1'], - id='tree', + id='grid-data', ), pytest.param( - 'dags/example_bash_operator/grid', - ['runme_1'], - id='grid', - ), - pytest.param( - 'tree?dag_id=example_subdag_operator.section-1', - ['section-1-task-1'], - id="tree-subdag-url-param", - ), - pytest.param( - 'dags/example_subdag_operator.section-1/grid', + 'object/grid_data?dag_id=example_subdag_operator.section-1', ['section-1-task-1'], - id="grid-subdag", + id="grid-data-subdag", ), pytest.param( 'duration?days=30&dag_id=example_bash_operator',