From d7bd72f494e7debec11672eeddf2e6ba5ef75fac Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 25 Jun 2022 09:46:13 +0200 Subject: [PATCH] Convert selective checks to Breeze Python (#24610) Instead of bash-based, complex logic script to perform PR selective checks we now integrated the whole logic into Breeze Python code. It is now much simplified, when it comes to algorithm. We've implemented simple rule-based decision tree. The rules describing the decision tree are now are now much easier to reason about and they correspond one-to-one with the rules that are implemented in the code in rather straightforward way. The code is much simpler and diagnostics of the selective checks has also been vastly improved: * The rule engine displays status of applying each rule and explains (with yellow warning message what decision was made and why. Informative messages are printed showing the resulting output * List of files impacting the decision are also displayed * The names of "ci file group" and "test type" were aligned * Unit tests covering wide range of cases are added. Each test describes what is the case they demonstrate * `breeze selective-checks` command that is used in CI can also be used locally by just providing commit-ish reference of the commit to check. This way you can very easily debug problems and fix them Fixes: #19971 --- .github/workflows/build-images.yml | 20 +- .github/workflows/ci.yml | 18 +- .github/workflows/codeql-analysis.yml | 20 +- BREEZE.rst | 36 +- SELECTIVE_CHECKS.md | 144 ---- dev/breeze/README.md | 2 +- dev/breeze/SELECTIVE_CHECKS.md | 98 +++ dev/breeze/setup.cfg | 1 + .../airflow_breeze/commands/ci_commands.py | 237 ++++++ .../configuration_and_maintenance_commands.py | 64 +- .../commands/release_management_commands.py | 35 - .../commands/testing_commands.py | 4 +- .../airflow_breeze/configure_rich_click.py | 5 +- .../src/airflow_breeze/global_constants.py | 75 +- .../airflow_breeze/utils/selective_checks.py | 480 +++++++++++ dev/breeze/tests/test_selective_checks.py | 464 +++++++++++ images/breeze/output-commands-hash.txt | 2 +- images/breeze/output-commands.svg | 380 ++++----- images/breeze/output-selective-check.svg | 132 +++ images/breeze/output-tests.svg | 160 ++-- scripts/ci/selective_ci_checks.sh | 768 ------------------ .../ci_run_single_airflow_test_in_docker.sh | 6 +- 22 files changed, 1814 insertions(+), 1337 deletions(-) delete mode 100644 SELECTIVE_CHECKS.md create mode 100644 dev/breeze/SELECTIVE_CHECKS.md create mode 100644 dev/breeze/src/airflow_breeze/commands/ci_commands.py create mode 100644 dev/breeze/src/airflow_breeze/utils/selective_checks.py create mode 100644 dev/breeze/tests/test_selective_checks.py create mode 100644 images/breeze/output-selective-check.svg delete mode 100755 scripts/ci/selective_ci_checks.sh diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 541c72165997b..e28a9d7619c5a 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -109,18 +109,20 @@ jobs: with: persist-credentials: false submodules: recursive + - name: "Setup python" + uses: actions/setup-python@v2 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh - name: Selective checks id: selective-checks env: - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }} - run: | - if [[ ${GITHUB_EVENT_NAME} == "pull_request_target" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi + PR_LABELS: "$${{ steps.get-latest-pr-labels.outputs.pullRequestLabels }}" + COMMIT_REF: "${{ github.sha }}" + run: breeze selective-check - name: Compute dynamic outputs id: dynamic-outputs run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0cea9d687f50c..6545423482912 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -192,18 +192,20 @@ jobs: fetch-depth: 2 persist-credentials: false if: github.event_name == 'pull_request' + - name: "Setup python" + uses: actions/setup-python@v2 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh - name: Selective checks id: selective-checks env: PR_LABELS: "${{ steps.source-run-info.outputs.pullRequestLabels }}" - run: | - if [[ ${GITHUB_EVENT_NAME} == "pull_request" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${GITHUB_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi + COMMIT_REF: "${{ github.sha }}" + run: breeze selective-check # Avoid having to specify the runs-on logic every time. We use the custom # env var AIRFLOW_SELF_HOSTED_RUNNER set only on our runners, but never # on the public runners diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 6d6f4d02562d5..4e6c7c83f4dc4 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -43,19 +43,19 @@ jobs: with: fetch-depth: 2 persist-credentials: false + - name: "Setup python" + uses: actions/setup-python@v2 + with: + # We do not have output from selective checks yet, so we need to hardcode python + python-version: 3.7 + cache: 'pip' + cache-dependency-path: ./dev/breeze/setup* + - run: ./scripts/ci/install_breeze.sh - name: Selective checks id: selective-checks env: - EVENT_NAME: ${{ github.event_name }} - TARGET_COMMIT_SHA: ${{ github.sha }} - run: | - if [[ ${EVENT_NAME} == "pull_request" ]]; then - # Run selective checks - ./scripts/ci/selective_ci_checks.sh "${TARGET_COMMIT_SHA}" - else - # Run all checks - ./scripts/ci/selective_ci_checks.sh - fi + COMMIT_REF: "${{ github.sha }}" + run: breeze selective-check analyze: name: Analyze diff --git a/BREEZE.rst b/BREEZE.rst index 7ce48270d02ff..64cbd2eef94d7 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -565,13 +565,17 @@ Configuration and maintenance * Cleanup breeze with ``breeze cleanup`` command * Self-upgrade breeze with ``breeze self-upgrade`` command * Setup autocomplete for Breeze with ``breeze setup-autocomplete`` command -* Checking available resources for docker with ``breeze resource-check`` command -* Freeing space needed to run CI tests with ``breeze free-space`` command -* Fixing ownership of files in your repository with ``breeze fix-ownership`` command * Print Breeze version with ``breeze version`` command * Outputs hash of commands defined by ``breeze`` with ``command-hash-export`` (useful to avoid needless regeneration of Breeze images) +CI tasks +-------- +* Freeing space needed to run CI tests with ``breeze free-space`` command +* Fixing ownership of files in your repository with ``breeze fix-ownership`` command +* Checking available resources for docker with ``breeze resource-check`` command +* Deciding which tests should be run with ``breeze selective-check`` command + Release tasks ------------- @@ -1295,8 +1299,8 @@ command but it is very similar to current ``breeze`` command): -Resource check -============== +Running resource check +---------------------- Breeze requires certain resources to be available - disk, memory, CPU. When you enter Breeze's shell, the resources are checked and information if there is enough resources is displayed. However you can @@ -1310,7 +1314,7 @@ Those are all available flags of ``resource-check`` command: Freeing the space -================= +----------------- When our CI runs a job, it needs all memory and disk it can have. We have a Breeze command that frees the memory and disk space used. You can also use it clear space locally but it performs a few operations @@ -1323,8 +1327,26 @@ Those are all available flags of ``free-space`` command: :alt: Breeze free-space +Selective check +--------------- + +When our CI runs a job, it needs to decide which tests to run, whether to build images and how much the test +should be run on multiple combinations of Python, Kubernetes, Backend versions. In order to optimize time +needed to run the CI Builds. You can also use the tool to test what tests will be run when you provide +a specific commit that Breeze should run the tests on. + +More details about the algorithm used to pick the right tests can be +found in `Selective Checks `_. + +Those are all available flags of ``selective-check`` command: + +.. image:: ./images/breeze/output-selective-check.svg + :width: 100% + :alt: Breeze selective-check + + Tracking backtracking issues for CI builds -========================================== +------------------------------------------ When our CI runs a job, we automatically upgrade our dependencies in the ``main`` build. However, this might lead to conflicts and ``pip`` backtracking for a long time (possibly forever) for dependency resolution. diff --git a/SELECTIVE_CHECKS.md b/SELECTIVE_CHECKS.md deleted file mode 100644 index 3a92d9c817987..0000000000000 --- a/SELECTIVE_CHECKS.md +++ /dev/null @@ -1,144 +0,0 @@ - - -# Selective CI Checks - -In order to optimise our CI jobs, we've implemented optimisations to only run selected checks for some -kind of changes. The logic implemented reflects the internal architecture of Airflow 2.0 packages -and it helps to keep down both the usage of jobs in GitHub Actions as well as CI feedback time to -contributors in case of simpler changes. - -We have the following test types (separated by packages in which they are): - -* Always - those are tests that should be always executed (always folder) -* Core - for the core Airflow functionality (core folder) -* API - Tests for the Airflow API (api and api_connexion folders) -* CLI - Tests for the Airflow CLI (cli folder) -* WWW - Tests for the Airflow webserver (www folder) -* Providers - Tests for all Providers of Airflow (providers folder) -* Other - all other tests (all other folders that are not part of any of the above) - -We also have several special kinds of tests that are not separated by packages but they are marked with -pytest markers. They can be found in any of those packages and they can be selected by the appropriate -pytest custom command line options. See `TESTING.rst `_ for details but those are: - -* Integration - tests that require external integration images running in docker-compose -* Quarantined - tests that are flaky and need to be fixed -* Postgres - tests that require Postgres database. They are only run when backend is Postgres -* MySQL - tests that require MySQL database. They are only run when backend is MySQL - -Even if the types are separated, In case they share the same backend version/python version, they are -run sequentially in the same job, on the same CI machine. Each of them in a separate `docker run` command -and with additional docker cleaning between the steps to not fall into the trap of exceeding resource -usage in one big test run, but also not to increase the number of jobs per each Pull Request. - -The logic implemented for the changes works as follows: - -1) In case of direct push (so when PR gets merged) or scheduled run, we always run all tests and checks. - This is in order to make sure that the merge did not miss anything important. The remainder of the logic - is executed only in case of Pull Requests. We do not add providers tests in case DEFAULT_BRANCH is - different than main, because providers are only important in main branch and PRs to main branch. - -2) We retrieve which files have changed in the incoming Merge Commit (github.sha is a merge commit - automatically prepared by GitHub in case of Pull Request, so we can retrieve the list of changed - files from that commit directly). - -3) If any of the important, environment files changed (Dockerfile, ci scripts, setup.py, GitHub workflow - files), then we again run all tests and checks. Those are cases where the logic of the checks changed - or the environment for the checks changed so we want to make sure to check everything. We do not add - providers tests in case DEFAULT_BRANCH is different than main, because providers are only - important in main branch and PRs to main branch. - -4) If any of py files changed: we need to have CI image and run full static checks so we enable image building - -5) If any of docs changed: we need to have CI image so we enable image building - -6) If any of chart files changed, we need to run helm tests so we enable helm unit tests - -7) If any of API files changed, we need to run API tests so we enable them - -8) If any of the relevant source files that trigger the tests have changed at all. Those are airflow - sources, chart, tests and kubernetes_tests. If any of those files changed, we enable tests and we - enable image building, because the CI images are needed to run tests. - -9) Then we determine which types of the tests should be run. We count all the changed files in the - relevant airflow sources (airflow, chart, tests, kubernetes_tests) first and then we count how many - files changed in different packages: - - * in any case tests in `Always` folder are run. Those are special tests that should be run any time - modifications to any Python code occurs. Example test of this type is verifying proper structure of - the project including proper naming of all files. - * if any of the Airflow API files changed we enable `API` test type - * if any of the Airflow CLI files changed we enable `CLI` test type and Kubernetes tests (the - K8S tests depend on CLI changes as helm chart uses CLI to run Airflow). - * if this is a main branch and if any of the Provider files changed we enable `Providers` test type - * if any of the WWW files changed we enable `WWW` test type - * if any of the Kubernetes files changed we enable `Kubernetes` test type - * Then we subtract count of all the `specific` above per-type changed files from the count of - all changed files. In case there are any files changed, then we assume that some unknown files - changed (likely from the core of airflow) and in this case we enable all test types above and the - Core test types - simply because we do not want to risk to miss anything. - * In all cases where tests are enabled we also add Integration and - depending on - the backend used = Postgres or MySQL types of tests. - -10) Quarantined tests are always run when tests are run - we need to run them often to observe how - often they fail so that we can decide to move them out of quarantine. Details about the - Quarantined tests are described in `TESTING.rst `_ - -11) There is a special case of static checks. In case the above logic determines that the CI image - needs to be built, we run long and more comprehensive version of static checks - including - Mypy, Flake8. And those tests are run on all files, no matter how many files changed. - In case the image is not built, we run only simpler set of changes - the longer static checks - that require CI image are skipped, and we only run the tests on the files that changed in the incoming - commit - unlike flake8/mypy, those static checks are per-file based and they should not miss any - important change. - -Similarly to selective tests we also run selective security scans. In Pull requests, -the Python scan will only run when there is a python code change and JavaScript scan will only run if -there is a JavaScript or `yarn.lock` file change. For main builds, all scans are always executed. - -The selective check algorithm is shown here: - - -````mermaid -flowchart TD -A(PR arrives)-->B[Selective Check] -B-->C{Direct push merge?} -C-->|Yes| N[Enable images] -N-->D(Run Full Test
+Quarantined
Run full static checks) -C-->|No| E[Retrieve changed files] -E-->F{Environment files changed?} -F-->|Yes| N -F-->|No| G{Docs changed} -G-->|Yes| O[Enable images building] -O-->I{Chart files changed?} -G-->|No| I -I-->|Yes| P[Enable helm tests] -P-->J{API files changed} -I-->|No| J -J-->|Yes| Q[Enable API tests] -Q-->H{Sources changed?} -J-->|No| H -H-->|Yes| R[Enable Pytests] -R-->K[Determine test type] -K-->S{Core files changed} -S-->|Yes| N -S-->|No| M(Run selected test+
Integration, Quarantined
Full static checks) -H-->|No| L[Skip running test
Run subset of static checks] -``` diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 14a9f089a4834..380eb402b83f4 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -52,6 +52,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: a80a853b2c32c284a68ccd6d468804b892a69f14d2ad1886bdaa892755cf6262660e2b9fc582bcae27ae478910055267a76edea2df658196198a0365150e93e5 +Package config hash: 7279229e03b197f2bbd10ebb7b313f67bba3a704735d3688652efc5bdc1b3a60f2d1e0a144c89a2ecd11268b06888c5302a8774a8f392dc383bb940c99521db3 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/SELECTIVE_CHECKS.md b/dev/breeze/SELECTIVE_CHECKS.md new file mode 100644 index 0000000000000..4504f8d9cd9cd --- /dev/null +++ b/dev/breeze/SELECTIVE_CHECKS.md @@ -0,0 +1,98 @@ + + + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [Selective CI Checks](#selective-ci-checks) + +# Selective CI Checks + +In order to optimise our CI jobs, we've implemented optimisations to only run selected checks for some +kind of changes. The logic implemented reflects the internal architecture of Airflow 2.0 packages, +and it helps to keep down both the usage of jobs in GitHub Actions and CI feedback time to +contributors in case of simpler changes. + +We have the following Groups of files for CI that determine which tests are run: + +* `Environment files` - if any of those changes, that forces 'run everything' mode, because changes there might + simply change the whole environment of what is going on in CI (Container image, dependencies) +* `Python and Javascript production files` - this area is useful in CodeQL Security scanning - if any of + the python or javascript files for airflow "production" changed, this means that the security scans should run +* `API tests and codegen files` - those are OpenAPI definition files that impact Open API specification and + determine that we should run dedicated API tests. +* `Helm files` - change in those files impacts helm "rendering" tests +* `Setup files` - change in the setup files indicates that we should run `upgrade to newer dependencies` +* `DOCs files` - change in those files indicate that we should run documentation builds +* `UI and WWW files` - those are files for the UI and WWW part of our UI (useful to determine if UI + tests should run) +* `Kubernetes files` - determine if any of Kubernetes related tests should be run +* `All Python files` - if none of the Python file changed, that indicates that we should not run unit tests +* `All source files` - if none of the sources change, that indicates that we should probably not build + an image and run any image-based static checks + +We have the following unit test types that can be selectively disabled/enabled based on the +content of the incoming PR: + +* Always - those are tests that should be always executed (always folder) +* Core - for the core Airflow functionality (core folder) +* API - Tests for the Airflow API (api and api_connexion folders) +* CLI - Tests for the Airflow CLI (cli folder) +* WWW - Tests for the Airflow webserver (www folder) +* Providers - Tests for all Providers of Airflow (providers folder) + +We also have several special kinds of tests that are not separated by packages, but they are marked with +pytest markers. They can be found in any of those packages and they can be selected by the appropriate +pytest custom command line options. See `TESTING.rst `_ for details but those are: + +* Integration - tests that require external integration images running in docker-compose +* Quarantined - tests that are flaky and need to be fixed +* Postgres - tests that require Postgres database. They are only run when backend is Postgres +* MySQL - tests that require MySQL database. They are only run when backend is MySQL + +Even if the types are separated, In case they share the same backend version/python version, they are +run sequentially in the same job, on the same CI machine. Each of them in a separate `docker run` command +and with additional docker cleaning between the steps to not fall into the trap of exceeding resource +usage in one big test run, but also not to increase the number of jobs per each Pull Request. + +The logic implements the following rules: + +* `Full tests` mode is enabled when the event is PUSH, or SCHEDULE or when "full tests needed" label is set. + That enables all matrix combinations of variables, and all possible tests +* Python, Kubernetes, Backend, Kind, Helm versions are limited to "defaults" only unless `Full tests` mode + is enabled. +* If "Commit" to work on cannot be determined, or `Full Test` mode is enabled or some of the important + environment files (setup.py, setup.cfg, Dockerfile, build scripts) changed - all unit tests are + executed - this is `run everything` mode. No further checks are performed. +* `Python scans`, `Javascript scans`, `API tests/codegen`, `UI`, `WWW`, `Kubernetes` tests and `DOC builds` + are enabled if any of the relevant files have been changed. +* `Helm` tests are run only if relevant files have been changed and if current branch is `main`. +* If no Source files are changed - no tests are run and no further rules below are checked. +* `Image building` is enabled if either test are run, docs are build or kubernetes tests are run. All those + need `CI` or `PROD` images to be built. +* The specific unit test type is enabled only if changed files match the expected patterns for each type + (`API`, `CLI`, `WWW`, `Providers`). The `Always` test type is added always if any unit tests are run. + `Providers` tests are removed if current branch is different than `main` +* If there are no files left in sources after matching the test types and Kubernetes files, + then apparently some Core/Other files have been changed. This automatically adds all test + types to execute. This is done because changes in core might impact all the other test types. +* if `Image building` is disabled, only basic pre-commits are enabled - no 'image-depending` pre-commits + are enabled. +* If there are some setup files changed, `upgrade to newer dependencies` is enabled. diff --git a/dev/breeze/setup.cfg b/dev/breeze/setup.cfg index c974560561053..7db0782695ebd 100644 --- a/dev/breeze/setup.cfg +++ b/dev/breeze/setup.cfg @@ -53,6 +53,7 @@ package_dir= =src packages = find: install_requires = + cached_property>=1.5.0;python_version<="3.7" click inputimeout importlib-metadata>=4.4; python_version < "3.8" diff --git a/dev/breeze/src/airflow_breeze/commands/ci_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_commands.py new file mode 100644 index 0000000000000..c8260698d71aa --- /dev/null +++ b/dev/breeze/src/airflow_breeze/commands/ci_commands.py @@ -0,0 +1,237 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import sys +from typing import Optional, Tuple + +import click + +from airflow_breeze.commands.main_command import main +from airflow_breeze.global_constants import ( + DEFAULT_PYTHON_MAJOR_MINOR_VERSION, + MOUNT_ALL, + GithubEvents, + github_events, +) +from airflow_breeze.params.shell_params import ShellParams +from airflow_breeze.utils.common_options import ( + option_airflow_constraints_reference, + option_answer, + option_dry_run, + option_github_repository, + option_max_age, + option_python, + option_timezone, + option_updated_on_or_after, + option_verbose, +) +from airflow_breeze.utils.confirm import Answer, user_confirm +from airflow_breeze.utils.console import get_console +from airflow_breeze.utils.custom_param_types import BetterChoice +from airflow_breeze.utils.docker_command_utils import ( + check_docker_resources, + get_env_variables_for_docker_commands, + get_extra_docker_flags, + perform_environment_checks, +) +from airflow_breeze.utils.find_newer_dependencies import find_newer_dependencies +from airflow_breeze.utils.image import find_available_ci_image +from airflow_breeze.utils.run_utils import run_command + +CI_COMMANDS = { + "name": "CI commands", + "commands": [ + "fix-ownership", + "free-space", + "resource-check", + "selective-check", + "find-newer-dependencies", + ], +} + +CI_PARAMETERS = { + "breeze selective-check": [ + { + "name": "Selective check flags", + "options": [ + "--commit-ref", + "--pr-labels", + "--default-branch", + "--github-event-name", + ], + } + ], + "breeze find-newer-dependencies": [ + { + "name": "Find newer dependencies flags", + "options": [ + "--python", + "--timezone", + "--constraints-branch", + "--updated-on-or-after", + "--max-age", + ], + } + ], +} + + +@main.command(name="free-space", help="Free space for jobs run in CI.") +@option_verbose +@option_dry_run +@option_answer +def free_space(verbose: bool, dry_run: bool, answer: str): + if user_confirm("Are you sure to run free-space and perform cleanup?") == Answer.YES: + run_command(["sudo", "swapoff", "-a"], verbose=verbose, dry_run=dry_run) + run_command(["sudo", "rm", "-f", "/swapfile"], verbose=verbose, dry_run=dry_run) + run_command(["sudo", "apt-get", "clean"], verbose=verbose, dry_run=dry_run, check=False) + run_command( + ["docker", "system", "prune", "--all", "--force", "--volumes"], verbose=verbose, dry_run=dry_run + ) + run_command(["df", "-h"], verbose=verbose, dry_run=dry_run) + run_command(["docker", "logout", "ghcr.io"], verbose=verbose, dry_run=dry_run, check=False) + + +@main.command(name="resource-check", help="Check if available docker resources are enough.") +@option_verbose +@option_dry_run +def resource_check(verbose: bool, dry_run: bool): + perform_environment_checks(verbose=verbose) + shell_params = ShellParams(verbose=verbose, python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION) + check_docker_resources(shell_params.airflow_image_name, verbose=verbose, dry_run=dry_run) + + +@main.command(name="fix-ownership", help="Fix ownership of source files to be same as host user.") +@option_github_repository +@option_verbose +@option_dry_run +def fix_ownership(github_repository: str, verbose: bool, dry_run: bool): + perform_environment_checks(verbose=verbose) + shell_params = find_available_ci_image(github_repository, dry_run, verbose) + extra_docker_flags = get_extra_docker_flags(MOUNT_ALL) + env = get_env_variables_for_docker_commands(shell_params) + cmd = [ + "docker", + "run", + "-t", + *extra_docker_flags, + "--pull", + "never", + shell_params.airflow_image_name_with_tag, + "/opt/airflow/scripts/in_container/run_fix_ownership.sh", + ] + run_command( + cmd, verbose=verbose, dry_run=dry_run, text=True, env=env, check=False, enabled_output_group=True + ) + # Always succeed + sys.exit(0) + + +def get_changed_files(commit_ref: Optional[str], dry_run: bool, verbose: bool) -> Tuple[str, ...]: + if commit_ref is None: + return () + cmd = [ + "git", + "diff-tree", + "--no-commit-id", + "--name-only", + "-r", + commit_ref + "^", + commit_ref, + ] + result = run_command(cmd, dry_run=dry_run, verbose=verbose, check=False, capture_output=True, text=True) + if result.returncode != 0: + get_console().print( + f"[warning] Error when running diff-tree command [/]\n{result.stdout}\n{result.stderr}" + ) + return () + changed_files = tuple(result.stdout.splitlines()) if result.stdout else () + get_console().print("\n[info]Changed files:[/]\n") + get_console().print(changed_files) + get_console().print() + return changed_files + + +@main.command(name="selective-check", help="Checks what kind of tests should be run for an incoming commit.") +@click.option( + '--commit-ref', + help="Commit-ish reference to the commit that should be checked", + envvar='COMMIT_REF', +) +@click.option( + '--pr-labels', + help="Space-separate list of labels which are valid for the PR", + default="", + envvar="PR_LABELS", +) +@click.option( + '--default-branch', + help="Branch against which the PR should be run", + default="main", + envvar="DEFAULT_BRANCH", + show_default=True, +) +@click.option( + '--github-event-name', + type=BetterChoice(github_events()), + default=github_events()[0], + help="Name of the GitHub event that triggered the check", + envvar="GITHUB_EVENT_NAME", + show_default=True, +) +@option_verbose +@option_dry_run +def selective_check( + commit_ref: Optional[str], + pr_labels: str, + default_branch: str, + github_event_name: str, + verbose: bool, + dry_run: bool, +): + from airflow_breeze.utils.selective_checks import SelectiveChecks + + github_event = GithubEvents(github_event_name) + if github_event == GithubEvents.PULL_REQUEST: + changed_files = get_changed_files(commit_ref=commit_ref, dry_run=dry_run, verbose=verbose) + else: + changed_files = () + sc = SelectiveChecks( + commit_ref=commit_ref, + files=changed_files, + default_branch=default_branch, + pr_labels=tuple(" ".split(pr_labels)) if pr_labels else (), + github_event=github_event, + ) + print(str(sc)) + + +@main.command(name="find-newer-dependencies", help="Finds which dependencies are being upgraded.") +@option_timezone +@option_airflow_constraints_reference +@option_python +@option_updated_on_or_after +@option_max_age +def breeze_find_newer_dependencies( + airflow_constraints_reference: str, python: str, timezone: str, updated_on_or_after: str, max_age: int +): + return find_newer_dependencies( + constraints_branch=airflow_constraints_reference, + python=python, + timezone=timezone, + updated_on_or_after=updated_on_or_after, + max_age=max_age, + ) diff --git a/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py b/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py index c11fa7882d9fc..20741e9986c87 100644 --- a/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py @@ -28,8 +28,6 @@ from airflow_breeze import NAME, VERSION from airflow_breeze.commands.main_command import main -from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION, MOUNT_ALL -from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.cache import check_if_cache_exists, delete_cache, touch_cache_file from airflow_breeze.utils.common_options import ( option_answer, @@ -44,13 +42,7 @@ ) from airflow_breeze.utils.confirm import STANDARD_TIMEOUT, Answer, user_confirm from airflow_breeze.utils.console import get_console -from airflow_breeze.utils.docker_command_utils import ( - check_docker_resources, - get_env_variables_for_docker_commands, - get_extra_docker_flags, - perform_environment_checks, -) -from airflow_breeze.utils.image import find_available_ci_image +from airflow_breeze.utils.docker_command_utils import perform_environment_checks from airflow_breeze.utils.path_utils import ( AIRFLOW_SOURCES_ROOT, BUILD_CACHE_DIR, @@ -72,9 +64,6 @@ "self-upgrade", "setup-autocomplete", "config", - "resource-check", - "free-space", - "fix-ownership", "regenerate-command-images", "command-hash-export", "version", @@ -394,31 +383,6 @@ def get_status(file: str): get_console().print() -@main.command(name="free-space", help="Free space for jobs run in CI.") -@option_verbose -@option_dry_run -@option_answer -def free_space(verbose: bool, dry_run: bool, answer: str): - if user_confirm("Are you sure to run free-space and perform cleanup?") == Answer.YES: - run_command(["sudo", "swapoff", "-a"], verbose=verbose, dry_run=dry_run) - run_command(["sudo", "rm", "-f", "/swapfile"], verbose=verbose, dry_run=dry_run) - run_command(["sudo", "apt-get", "clean"], verbose=verbose, dry_run=dry_run, check=False) - run_command( - ["docker", "system", "prune", "--all", "--force", "--volumes"], verbose=verbose, dry_run=dry_run - ) - run_command(["df", "-h"], verbose=verbose, dry_run=dry_run) - run_command(["docker", "logout", "ghcr.io"], verbose=verbose, dry_run=dry_run, check=False) - - -@main.command(name="resource-check", help="Check if available docker resources are enough.") -@option_verbose -@option_dry_run -def resource_check(verbose: bool, dry_run: bool): - perform_environment_checks(verbose=verbose) - shell_params = ShellParams(verbose=verbose, python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION) - check_docker_resources(shell_params.airflow_image_name, verbose=verbose, dry_run=dry_run) - - def dict_hash(dictionary: Dict[str, Any]) -> str: """MD5 hash of a dictionary. Sorted and dumped via json to account for random sequence)""" dhash = hashlib.md5() @@ -442,32 +406,6 @@ def command_hash_export(verbose: bool, output: IO): output.write(dict_hash(the_context_dict) + "\n") -@main.command(name="fix-ownership", help="Fix ownership of source files to be same as host user.") -@option_github_repository -@option_verbose -@option_dry_run -def fix_ownership(github_repository: str, verbose: bool, dry_run: bool): - perform_environment_checks(verbose=verbose) - shell_params = find_available_ci_image(github_repository, dry_run, verbose) - extra_docker_flags = get_extra_docker_flags(MOUNT_ALL) - env = get_env_variables_for_docker_commands(shell_params) - cmd = [ - "docker", - "run", - "-t", - *extra_docker_flags, - "--pull", - "never", - shell_params.airflow_image_name_with_tag, - "/opt/airflow/scripts/in_container/run_fix_ownership.sh", - ] - run_command( - cmd, verbose=verbose, dry_run=dry_run, text=True, env=env, check=False, enabled_output_group=True - ) - # Always succeed - sys.exit(0) - - def write_to_shell(command_to_execute: str, dry_run: bool, script_path: str, force_setup: bool) -> bool: skip_check = False script_path_file = Path(script_path) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index caf43d47a409b..17bf70b3af9d1 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -45,14 +45,11 @@ option_github_repository, option_image_tag, option_installation_package_format, - option_max_age, option_package_format, option_parallelism, option_python, option_python_versions, option_run_in_parallel, - option_timezone, - option_updated_on_or_after, option_use_airflow_version, option_use_packages_from_dist, option_verbose, @@ -66,7 +63,6 @@ get_extra_docker_flags, perform_environment_checks, ) -from airflow_breeze.utils.find_newer_dependencies import find_newer_dependencies from airflow_breeze.utils.parallel import check_async_run_results from airflow_breeze.utils.python_versions import get_python_version_list from airflow_breeze.utils.run_utils import RunCommandResult, run_command @@ -140,18 +136,6 @@ ], } ], - "breeze find-newer-dependencies": [ - { - "name": "Find newer dependencies flags", - "options": [ - "--python", - "--timezone", - "--constraints-branch", - "--updated-on-or-after", - "--max-age", - ], - } - ], } RELEASE_MANAGEMENT_COMMANDS = { @@ -163,7 +147,6 @@ "prepare-airflow-package", "release-prod-images", "generate-constraints", - "find-newer-dependencies", ], } @@ -767,21 +750,3 @@ def release_prod_images( verbose=verbose, dry_run=dry_run, ) - - -@main.command(name="find-newer-dependencies", help="Finds which dependencies are being upgraded.") -@option_timezone -@option_airflow_constraints_reference -@option_python -@option_updated_on_or_after -@option_max_age -def breeze_find_newer_dependencies( - airflow_constraints_reference: str, python: str, timezone: str, updated_on_or_after: str, max_age: int -): - return find_newer_dependencies( - constraints_branch=airflow_constraints_reference, - python=python, - timezone=timezone, - updated_on_or_after=updated_on_or_after, - max_age=max_age, - ) diff --git a/dev/breeze/src/airflow_breeze/commands/testing_commands.py b/dev/breeze/src/airflow_breeze/commands/testing_commands.py index 3946acd2ab3a9..45df368afa89c 100644 --- a/dev/breeze/src/airflow_breeze/commands/testing_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/testing_commands.py @@ -28,7 +28,7 @@ import click from airflow_breeze.commands.main_command import main -from airflow_breeze.global_constants import ALLOWED_TEST_TYPES +from airflow_breeze.global_constants import ALLOWED_TEST_TYPE_CHOICES from airflow_breeze.params.build_prod_params import BuildProdParams from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.ci_group import ci_group @@ -249,7 +249,7 @@ def run_with_progress( "--test-type", help="Type of test to run.", default="All", - type=BetterChoice(ALLOWED_TEST_TYPES), + type=BetterChoice(ALLOWED_TEST_TYPE_CHOICES), ) @option_db_reset @click.argument('extra_pytest_args', nargs=-1, type=click.UNPROCESSED) diff --git a/dev/breeze/src/airflow_breeze/configure_rich_click.py b/dev/breeze/src/airflow_breeze/configure_rich_click.py index bc1963684a8a4..8933d57dbeea3 100644 --- a/dev/breeze/src/airflow_breeze/configure_rich_click.py +++ b/dev/breeze/src/airflow_breeze/configure_rich_click.py @@ -14,14 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - - from airflow_breeze.utils import recording # isort:skip # noqa try: # We handle ImportError so that click autocomplete works import rich_click as click + from airflow_breeze.commands.ci_commands import CI_COMMANDS, CI_PARAMETERS from airflow_breeze.commands.ci_image_commands import CI_IMAGE_TOOLS_COMMANDS, CI_IMAGE_TOOLS_PARAMETERS from airflow_breeze.commands.configuration_and_maintenance_commands import ( CONFIGURATION_AND_MAINTENANCE_COMMANDS, @@ -52,6 +51,7 @@ **CONFIGURATION_AND_MAINTENANCE_PARAMETERS, **CI_IMAGE_TOOLS_PARAMETERS, **PRODUCTION_IMAGE_TOOLS_PARAMETERS, + **CI_PARAMETERS, **RELEASE_MANAGEMENT_PARAMETERS, } click.rich_click.COMMAND_GROUPS = { @@ -61,6 +61,7 @@ CONFIGURATION_AND_MAINTENANCE_COMMANDS, CI_IMAGE_TOOLS_COMMANDS, PRODUCTION_IMAGE_TOOLS_COMMANDS, + CI_COMMANDS, RELEASE_MANAGEMENT_COMMANDS, ] } diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 7eeaf1c8605d6..f6727d7a0a8b1 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -17,8 +17,11 @@ """ Global constants that are used by all other Breeze components. """ +from __future__ import annotations + import platform -from typing import List +from enum import Enum +from functools import lru_cache from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT @@ -67,21 +70,36 @@ ALLOWED_POSTGRES_VERSIONS = ['10', '11', '12', '13', '14'] ALLOWED_MYSQL_VERSIONS = ['5.7', '8'] ALLOWED_MSSQL_VERSIONS = ['2017-latest', '2019-latest'] -ALLOWED_TEST_TYPES = [ - 'All', - 'Always', - 'Core', - 'Providers', - 'API', - 'CLI', - 'Integration', - 'Other', - 'WWW', - 'Postgres', - 'MySQL', - 'Helm', - 'Quarantined', + + +@lru_cache(maxsize=None) +def all_selective_test_types() -> tuple[str, ...]: + return tuple(sorted(e.value for e in SelectiveUnitTestTypes)) + + +class SelectiveUnitTestTypes(Enum): + ALWAYS = 'Always' + API = 'API' + CLI = 'CLI' + CORE = 'Core' + OTHER = 'Other' + INTEGRATION = 'Integration' + PROVIDERS = 'Providers' + WWW = 'WWW' + + +ALLOWED_TEST_TYPE_CHOICES = [ + "All", + "Always", + *all_selective_test_types(), + "Helm", + "Postgres", + "MySQL", + "Integration", + "Other", + "Quarantine", ] + ALLOWED_PACKAGE_FORMATS = ['wheel', 'sdist', 'both'] ALLOWED_INSTALLATION_PACKAGE_FORMATS = ['wheel', 'sdist'] ALLOWED_INSTALLATION_METHODS = ['.', 'apache-airflow'] @@ -114,7 +132,7 @@ ] -def get_available_packages(short_version=False) -> List[str]: +def get_available_packages(short_version=False) -> list[str]: docs_path_content = (AIRFLOW_SOURCES_ROOT / 'docs').glob('*/') available_packages = [x.name for x in docs_path_content if x.is_dir()] package_list = list(set(available_packages) - set(EXCLUDE_DOCS_PACKAGE_FOLDER)) @@ -153,8 +171,12 @@ def get_default_platform_machine() -> str: ALL_PYTHON_MAJOR_MINOR_VERSIONS = ['3.7', '3.8', '3.9', '3.10'] CURRENT_PYTHON_MAJOR_MINOR_VERSIONS = ['3.7', '3.8', '3.9', '3.10'] CURRENT_POSTGRES_VERSIONS = ['10', '11', '12', '13', '14'] +DEFAULT_POSTGRES_VERSION = CURRENT_POSTGRES_VERSIONS[0] CURRENT_MYSQL_VERSIONS = ['5.7', '8'] +DEFAULT_MYSQL_VERSION = CURRENT_MYSQL_VERSIONS[0] CURRENT_MSSQL_VERSIONS = ['2017-latest', '2019-latest'] +DEFAULT_MSSQL_VERSION = CURRENT_MSSQL_VERSIONS[0] + DB_RESET = False START_AIRFLOW = "false" LOAD_EXAMPLES = False @@ -225,10 +247,10 @@ def get_airflow_extras(): CURRENT_HELM_VERSIONS = ['v3.6.3'] CURRENT_EXECUTORS = ['KubernetesExecutor'] -DEFAULT_KUBERNETES_MODES = CURRENT_KUBERNETES_MODES[0] -DEFAULT_KUBERNETES_VERSIONS = CURRENT_KUBERNETES_VERSIONS[0] -DEFAULT_KIND_VERSIONS = CURRENT_KIND_VERSIONS[0] -DEFAULT_HELM_VERSIONS = CURRENT_HELM_VERSIONS[0] +DEFAULT_KUBERNETES_MODE = CURRENT_KUBERNETES_MODES[0] +DEFAULT_KUBERNETES_VERSION = CURRENT_KUBERNETES_VERSIONS[0] +DEFAULT_KIND_VERSION = CURRENT_KIND_VERSIONS[0] +DEFAULT_HELM_VERSION = CURRENT_HELM_VERSIONS[0] DEFAULT_EXECUTOR = CURRENT_EXECUTORS[0] # Initialize image build variables - Have to check if this has to go to ci dataclass @@ -276,3 +298,16 @@ def get_airflow_extras(): "virtualenv", # END OF EXTRAS LIST UPDATED BY PRE COMMIT ] + + +class GithubEvents(Enum): + PULL_REQUEST = "pull_request" + PULL_REQUEST_REVIEW = "pull_request_review" + PULL_REQUEST_TARGET = "pull_request_target" + PUSH = "push" + SCHEDULE = "schedule" + + +@lru_cache(maxsize=None) +def github_events() -> list[str]: + return [e.value for e in GithubEvents] diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py new file mode 100644 index 0000000000000..22e74f455301f --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -0,0 +1,480 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import sys +from enum import Enum + +if sys.version_info >= (3, 8): + from functools import cached_property +else: + from cached_property import cached_property + +from functools import lru_cache +from re import match +from typing import Any, Dict, List, TypeVar + +from airflow_breeze.global_constants import ( + ALL_PYTHON_MAJOR_MINOR_VERSIONS, + CURRENT_HELM_VERSIONS, + CURRENT_KIND_VERSIONS, + CURRENT_KUBERNETES_MODES, + CURRENT_KUBERNETES_VERSIONS, + CURRENT_MSSQL_VERSIONS, + CURRENT_MYSQL_VERSIONS, + CURRENT_POSTGRES_VERSIONS, + CURRENT_PYTHON_MAJOR_MINOR_VERSIONS, + DEFAULT_HELM_VERSION, + DEFAULT_KIND_VERSION, + DEFAULT_KUBERNETES_MODE, + DEFAULT_KUBERNETES_VERSION, + DEFAULT_MSSQL_VERSION, + DEFAULT_MYSQL_VERSION, + DEFAULT_POSTGRES_VERSION, + DEFAULT_PYTHON_MAJOR_MINOR_VERSION, + GithubEvents, + SelectiveUnitTestTypes, + all_selective_test_types, +) +from airflow_breeze.utils.console import get_console + +FULL_TESTS_NEEDED_LABEL = "full tests needed" + + +def get_ga_output(name: str, value: Any) -> str: + output_name = name.replace('_', '-') + printed_value = str(value).lower() if isinstance(value, bool) else value + get_console().print(f"[info]{output_name}[/] = [green]{printed_value}[/]") + return f"::set-output name={output_name}::{printed_value}" + + +class FileGroupForCi(Enum): + ENVIRONMENT_FILES = "environment_files" + PYTHON_PRODUCTION_FILES = "python_scans" + JAVASCRIPT_PRODUCTION_FILES = "javascript_scans" + API_TEST_FILES = "api_test_files" + API_CODEGEN_FILES = "api_codegen_files" + HELM_FILES = "helm_files" + SETUP_FILES = "setup_files" + DOC_FILES = "doc_files" + UI_FILES = "ui_files" + WWW_FILES = "www_files" + KUBERNETES_FILES = "kubernetes_files" + ALL_PYTHON_FILES = "all_python_files" + ALL_SOURCE_FILES = "all_sources_for_tests" + + +T = TypeVar('T', FileGroupForCi, SelectiveUnitTestTypes) + + +class HashableDict(Dict[T, List[str]]): + def __hash__(self): + return hash(frozenset(self)) + + +CI_FILE_GROUP_MATCHES = HashableDict( + { + FileGroupForCi.ENVIRONMENT_FILES: [ + r"^.github/workflows", + r"^dev/breeze", + r"^Dockerfile", + r"^scripts", + r"^setup.py", + r"^setup.cfg", + ], + FileGroupForCi.PYTHON_PRODUCTION_FILES: [ + r"^airflow/.*\.py", + r"^setup.py", + ], + FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES: [ + r"^airflow/.*\.[jt]sx?", + r"^airflow/.*\.lock", + ], + FileGroupForCi.API_TEST_FILES: [ + r"^airflow/api", + ], + FileGroupForCi.API_CODEGEN_FILES: [ + "^airflow/api_connexion/openapi/v1.yaml", + "^clients/gen", + ], + FileGroupForCi.HELM_FILES: [ + "^chart", + ], + FileGroupForCi.SETUP_FILES: [ + r"^pyproject.toml", + r"^setup.cfg", + r"^setup.py", + ], + FileGroupForCi.DOC_FILES: [ + r"^docs", + r"^airflow/.*\.py$", + r"^chart", + r"^providers", + r"^CHANGELOG\.txt", + r"^airflow/config_templates/config\.yml", + r"^chart/RELEASE_NOTES\.txt", + r"^chart/values\.schema\.json", + r"^chart/values\.json", + ], + FileGroupForCi.UI_FILES: [ + r"^airflow/ui/.*\.[tj]sx?$", + r"^airflow/ui/[^/]+\.json$", + r"^airflow/ui/.*\.lock$", + ], + FileGroupForCi.WWW_FILES: [ + r"^airflow/www/.*\.js[x]?$", + r"^airflow/www/[^/]+\.json$", + r"^airflow/www/.*\.lock$", + ], + FileGroupForCi.KUBERNETES_FILES: [ + r"^chart", + r"^kubernetes_tests", + r"^airflow/providers/cncf/kubernetes/", + r"^tests/providers/cncf/kubernetes/", + ], + FileGroupForCi.ALL_PYTHON_FILES: [ + r"\.py$", + ], + FileGroupForCi.ALL_SOURCE_FILES: [ + "^.pre-commit-config.yaml$", + "^airflow", + "^chart", + "^tests", + "^kubernetes_tests", + ], + } +) + + +TEST_TYPE_MATCHES = HashableDict( + { + SelectiveUnitTestTypes.API: [ + r"^airflow/api", + r"^airflow/api_connexion", + r"^tests/api", + r"^tests/api_connexion", + ], + SelectiveUnitTestTypes.CLI: [ + r"^airflow/cli", + r"^tests/cli", + ], + SelectiveUnitTestTypes.PROVIDERS: [ + "^airflow/providers/", + "^tests/providers/", + ], + SelectiveUnitTestTypes.WWW: ["^airflow/www", "^tests/www", "^airflow/ui"], + } +) + + +class SelectiveChecks: + __HASHABLE_FIELDS = {'_files', '_default_branch', '_commit_ref', "_pr_labels", "_github_event"} + + def __init__( + self, + files: tuple[str, ...] = (), + default_branch="main", + commit_ref: str | None = None, + pr_labels: tuple[str, ...] = (), + github_event: GithubEvents = GithubEvents.PULL_REQUEST, + ): + self._files = files + self._default_branch = default_branch + self._commit_ref = commit_ref + self._pr_labels = pr_labels + self._github_event = github_event + + def __important_attributes(self) -> tuple[Any, ...]: + return tuple(getattr(self, f) for f in self.__HASHABLE_FIELDS) + + def __hash__(self): + return hash(self.__important_attributes()) + + def __eq__(self, other): + return isinstance(other, SelectiveChecks) and all( + [getattr(other, f) == getattr(self, f) for f in self.__HASHABLE_FIELDS] + ) + + def __str__(self) -> str: + output = [] + for field_name in dir(self): + if not field_name.startswith('_'): + output.append(get_ga_output(field_name, getattr(self, field_name))) + return "\n".join(output) + + default_python_version = DEFAULT_PYTHON_MAJOR_MINOR_VERSION + default_postgres_version = DEFAULT_POSTGRES_VERSION + default_mysql_version = DEFAULT_MYSQL_VERSION + default_mssql_version = DEFAULT_MSSQL_VERSION + + default_kubernetes_version = DEFAULT_KUBERNETES_VERSION + default_kind_version = DEFAULT_KIND_VERSION + default_helm_version = DEFAULT_HELM_VERSION + + @cached_property + def default_branch(self) -> str: + return self._default_branch + + @cached_property + def _full_tests_needed(self) -> bool: + if self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE]: + get_console().print(f"[warning]Full tests needed because event is {self._github_event}[/]") + return True + if FULL_TESTS_NEEDED_LABEL in self._pr_labels: + get_console().print(f"[warning]Full tests needed because labels are {self._pr_labels}[/]") + return True + return False + + @cached_property + def python_versions(self) -> list[str]: + return ( + CURRENT_PYTHON_MAJOR_MINOR_VERSIONS + if self._full_tests_needed + else [DEFAULT_PYTHON_MAJOR_MINOR_VERSION] + ) + + @cached_property + def python_versions_list_as_string(self) -> str: + return " ".join(self.python_versions) + + @cached_property + def all_python_versions(self) -> list[str]: + return ( + ALL_PYTHON_MAJOR_MINOR_VERSIONS + if self._run_everything or self._full_tests_needed + else [DEFAULT_PYTHON_MAJOR_MINOR_VERSION] + ) + + @cached_property + def all_python_versions_list_as_string(self) -> str: + return " ".join(self.all_python_versions) + + @cached_property + def kubernetes_modes(self): + return CURRENT_KUBERNETES_MODES if self._full_tests_needed else [DEFAULT_KUBERNETES_MODE] + + @cached_property + def postgres_versions(self) -> list[str]: + return CURRENT_POSTGRES_VERSIONS if self._full_tests_needed else [DEFAULT_POSTGRES_VERSION] + + @cached_property + def mysql_versions(self) -> list[str]: + return CURRENT_MYSQL_VERSIONS if self._full_tests_needed else [DEFAULT_MYSQL_VERSION] + + @cached_property + def mssql_versions(self) -> list[str]: + return CURRENT_MSSQL_VERSIONS if self._full_tests_needed else [DEFAULT_MSSQL_VERSION] + + @cached_property + def kind_versions(self) -> list[str]: + return CURRENT_KIND_VERSIONS + + @cached_property + def helm_versions(self) -> list[str]: + return CURRENT_HELM_VERSIONS + + @cached_property + def postgres_exclude(self) -> list[dict[str, str]]: + return [{"python-version": "3.7"}] if self._full_tests_needed else [] + + @cached_property + def mssql_exclude(self) -> list[dict[str, str]]: + return [{"python-version": "3.8"}] if self._full_tests_needed else [] + + @cached_property + def mysql_exclude(self) -> list[dict[str, str]]: + return [{"python-version": "3.10"}] if self._full_tests_needed else [] + + @cached_property + def sqlite_exclude(self) -> list[dict[str, str]]: + return [{"python-version": "3.9"}] if self._full_tests_needed else [] + + @cached_property + def kubernetes_versions(self) -> list[str]: + return CURRENT_KUBERNETES_VERSIONS if self._full_tests_needed else [DEFAULT_KUBERNETES_VERSION] + + @cached_property + def kubernetes_versions_list_as_string(self) -> str: + return " ".join(self.kubernetes_versions) + + def _match_files_with_regexps(self, matched_files, regexps): + for file in self._files: + for regexp in regexps: + if match(regexp, file): + matched_files.append(file) + break + + @lru_cache(maxsize=None) + def _matching_files(self, match_group: T, match_dict: dict[T, list[str]]) -> list[str]: + matched_files: list[str] = [] + regexps = match_dict[match_group] + self._match_files_with_regexps(matched_files, regexps) + count = len(matched_files) + if count > 0: + get_console().print(f"[warning]{match_group} matched {count} files.[/]") + get_console().print(matched_files) + else: + get_console().print(f"[warning]{match_group} did not match any file.[/]") + return matched_files + + @cached_property + def _run_everything(self) -> bool: + if not self._commit_ref: + get_console().print("[warning]Running everything as commit is missing[/]") + return True + if self._full_tests_needed: + get_console().print("[warning]Running everything as full tests are needed[/]") + return True + if len(self._matching_files(FileGroupForCi.ENVIRONMENT_FILES, CI_FILE_GROUP_MATCHES)) > 0: + get_console().print("[warning]Running everything because env files changed[/]") + return True + return False + + def _should_be_run(self, source_area: FileGroupForCi) -> bool: + if self._run_everything: + get_console().print(f"[warning]{source_area} enabled because we are running everything[/]") + return True + matched_files = self._matching_files(source_area, CI_FILE_GROUP_MATCHES) + if len(matched_files) > 0: + get_console().print( + f"[warning]{source_area} enabled because it matched {len(matched_files)} changed files[/]" + ) + return True + else: + get_console().print( + f"[warning]{source_area} disabled because it did not match any changed files[/]" + ) + return False + + @cached_property + def needs_python_scans(self) -> bool: + return self._should_be_run(FileGroupForCi.PYTHON_PRODUCTION_FILES) + + @cached_property + def needs_javascript_scans(self) -> bool: + return self._should_be_run(FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES) + + @cached_property + def needs_api_tests(self) -> bool: + return self._should_be_run(FileGroupForCi.API_TEST_FILES) + + @cached_property + def needs_api_codegen(self) -> bool: + return self._should_be_run(FileGroupForCi.API_CODEGEN_FILES) + + @cached_property + def run_ui_tests(self) -> bool: + return self._should_be_run(FileGroupForCi.UI_FILES) + + @cached_property + def run_www_tests(self) -> bool: + return self._should_be_run(FileGroupForCi.WWW_FILES) + + @cached_property + def run_kubernetes_tests(self) -> bool: + return self._should_be_run(FileGroupForCi.KUBERNETES_FILES) + + @cached_property + def docs_build(self) -> bool: + return self._should_be_run(FileGroupForCi.DOC_FILES) + + @cached_property + def needs_helm_tests(self) -> bool: + return self._should_be_run(FileGroupForCi.HELM_FILES) and self._default_branch == "main" + + @cached_property + def run_tests(self) -> bool: + return self._should_be_run(FileGroupForCi.ALL_SOURCE_FILES) + + @cached_property + def image_build(self) -> bool: + return self.run_tests or self.docs_build or self.run_kubernetes_tests + + def _select_test_type_if_matching( + self, test_types: set[str], test_type: SelectiveUnitTestTypes + ) -> list[str]: + matched_files = self._matching_files(test_type, TEST_TYPE_MATCHES) + count = len(matched_files) + if count > 0: + test_types.add(test_type.value) + get_console().print(f"[warning]{test_type} added because it matched {count} files[/]") + return matched_files + + def _get_test_types_to_run(self) -> list[str]: + candidate_test_types: set[str] = {"Always"} + matched_files: set[str] = set() + matched_files.update( + self._select_test_type_if_matching(candidate_test_types, SelectiveUnitTestTypes.WWW) + ) + matched_files.update( + self._select_test_type_if_matching(candidate_test_types, SelectiveUnitTestTypes.PROVIDERS) + ) + matched_files.update( + self._select_test_type_if_matching(candidate_test_types, SelectiveUnitTestTypes.CLI) + ) + matched_files.update( + self._select_test_type_if_matching(candidate_test_types, SelectiveUnitTestTypes.API) + ) + + kubernetes_files = self._matching_files(FileGroupForCi.KUBERNETES_FILES, CI_FILE_GROUP_MATCHES) + all_source_files = self._matching_files(FileGroupForCi.ALL_SOURCE_FILES, CI_FILE_GROUP_MATCHES) + + remaining_files = set(all_source_files) - set(matched_files) - set(kubernetes_files) + count_remaining_files = len(remaining_files) + if count_remaining_files > 0: + get_console().print( + f"[warning]We should run all tests. There are {count_remaining_files} changed " + "files that seems to fall into Core/Other category[/]" + ) + get_console().print(remaining_files) + candidate_test_types.update(all_selective_test_types()) + else: + get_console().print( + "[warning]There are no core/other files. Only tests relevant to the changed files are run.[/]" + ) + sorted_candidate_test_types = list(sorted(candidate_test_types)) + get_console().print("[warning]Selected test type candidates to run:[/]") + get_console().print(sorted_candidate_test_types) + return sorted_candidate_test_types + + @cached_property + def test_types(self) -> str: + if not self.run_tests: + return "" + if self._run_everything: + current_test_types = list(all_selective_test_types()) + else: + current_test_types = self._get_test_types_to_run() + if self._default_branch != "main": + if "Providers" in current_test_types: + get_console().print( + "[warning]Removing 'Providers' because the target branch " + f"is {self._default_branch} and not main[/]" + ) + current_test_types.remove("Providers") + return " ".join(sorted(current_test_types)) + + @cached_property + def basic_checks_only(self) -> bool: + return not self.image_build + + @cached_property + def upgrade_to_newer_dependencies(self) -> bool: + return len( + self._matching_files(FileGroupForCi.SETUP_FILES, CI_FILE_GROUP_MATCHES) + ) > 0 or self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE] diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py new file mode 100644 index 0000000000000..492135ebd38b0 --- /dev/null +++ b/dev/breeze/tests/test_selective_checks.py @@ -0,0 +1,464 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Dict, Tuple + +import pytest + +from airflow_breeze.global_constants import GithubEvents +from airflow_breeze.utils.selective_checks import SelectiveChecks + + +def assert_outputs_are_printed(expected_outputs: Dict[str, str], output: str): + for name, value in expected_outputs.items(): + assert f"::set-output name={name}::{value}" in output + + +@pytest.mark.parametrize( + "files, expected_outputs,", + [ + ( + pytest.param( + ("INTHEWILD.md",), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "false", + "needs-helm-tests": "false", + "run-tests": "false", + "docs-build": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "", + }, + id="No tests on simple change", + ) + ), + ( + pytest.param( + ("airflow/api/file.py",), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always", + }, + id="Only API tests and DOCS should run", + ) + ), + ( + pytest.param( + ( + "airflow/api/file.py", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always Providers", + }, + id="API and providers tests and docs should run", + ) + ), + ( + pytest.param( + ("tests/providers/google/file.py",), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "false", + "run-kubernetes-tests": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "Always Providers", + }, + id="Providers and docs should run", + ) + ), + ( + pytest.param( + ("docs/file.rst",), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "false", + "docs-build": "true", + "run-kubernetes-tests": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "", + }, + id="Only docs builds should run - no tests needed", + ) + ), + ( + pytest.param( + ( + "chart/aaaa.txt", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "Always Providers", + }, + id="Helm tests, providers, kubernetes tests and docs should run", + ) + ), + ( + pytest.param( + ( + "INTHEWILD.md", + "chart/aaaa.txt", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "Always Providers", + }, + id="Helm tests, providers, kubernetes tests and docs should run even if " + "unimportant files were added", + ) + ), + ( + pytest.param( + ("setup.py",), + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "true", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="Everything should run and upgrading to newer requirements as setup.py changed", + ) + ), + ], +) +def test_expected_output_pull_request_main( + files: Tuple[str, ...], + expected_outputs: Dict[str, str], +): + sc = SelectiveChecks( + files=files, + commit_ref="HEAD", + github_event=GithubEvents.PULL_REQUEST, + pr_labels=(), + default_branch="main", + ) + assert_outputs_are_printed(expected_outputs, str(sc)) + + +@pytest.mark.parametrize( + "files, pr_labels, default_branch, expected_outputs,", + [ + ( + pytest.param( + ("INTHEWILD.md",), + ("full tests needed",), + "main", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="Everything should run when full tests are needed", + ) + ), + ( + pytest.param( + ("INTHEWILD.md",), + ( + "another label", + "full tests needed", + ), + "main", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="Everything should run when full tests are needed even with different label set as well", + ) + ), + ( + pytest.param( + (), + ("full tests needed",), + "main", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="Everything should run when full tests are needed even if no files are changed", + ) + ), + ( + pytest.param( + ("INTHEWILD.md",), + ("full tests needed",), + "v2-3-stable", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other WWW", + }, + id="Everything should run except Providers when full tests are needed for non-main branch", + ) + ), + ], +) +def test_expected_output_full_tests_needed( + files: Tuple[str, ...], + pr_labels: Tuple[str, ...], + default_branch: str, + expected_outputs: Dict[str, str], +): + sc = SelectiveChecks( + files=files, + commit_ref="HEAD", + github_event=GithubEvents.PULL_REQUEST, + pr_labels=pr_labels, + default_branch=default_branch, + ) + output = str(sc) + assert_outputs_are_printed(expected_outputs, output) + + +@pytest.mark.parametrize( + "files, expected_outputs,", + [ + pytest.param( + ("INTHEWILD.md",), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "false", + "needs-helm-tests": "false", + "run-tests": "false", + "docs-build": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "", + }, + id="Everything should run when full tests are needed even if no files are changed", + ), + pytest.param( + ( + "chart/aaaa.txt", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "needs-helm-tests": "false", + "image-build": "true", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "Always", + }, + id="No Helm tests, No providers should run if only chart/providers changed in non-main", + ), + pytest.param( + ( + "airflow/cli/test.py", + "chart/aaaa.txt", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "Always CLI", + }, + id="Only CLI tests and Kubernetes tests should run if cli/chart files changed in non-main branch", + ), + pytest.param( + ( + "airflow/file.py", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other WWW", + }, + id="All tests except providers should run if core file changed in non-main branch", + ), + ], +) +def test_expected_output_pull_request_v2_3( + files: Tuple[str, ...], + expected_outputs: Dict[str, str], +): + sc = SelectiveChecks( + files=files, + commit_ref="HEAD", + github_event=GithubEvents.PULL_REQUEST, + pr_labels=(), + default_branch="v2-3-stable", + ) + assert_outputs_are_printed(expected_outputs, str(sc)) + + +@pytest.mark.parametrize( + "files, pr_labels, default_branch, expected_outputs,", + [ + pytest.param( + ("INTHEWILD.md",), + (), + "main", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "true", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="All tests run on push even if unimportant file changed", + ), + pytest.param( + ("INTHEWILD.md",), + (), + "v2-3-stable", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "true", + "test-types": "API Always CLI Core Integration Other WWW", + }, + id="All tests except Providers and Helm run on push" + " even if unimportant file changed in non-main branch", + ), + pytest.param( + ("airflow/api.py",), + (), + "main", + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "true", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="All tests run on push if core file changed", + ), + ], +) +def test_expected_output_push( + files: Tuple[str, ...], + pr_labels: Tuple[str, ...], + default_branch: str, + expected_outputs: Dict[str, str], +): + sc = SelectiveChecks( + files=files, + commit_ref="HEAD", + github_event=GithubEvents.PUSH, + pr_labels=pr_labels, + default_branch=default_branch, + ) + assert_outputs_are_printed(expected_outputs, str(sc)) + + +def test_no_commit_provided(): + sc = SelectiveChecks( + files=(), + commit_ref="", + github_event=GithubEvents.PULL_REQUEST, + pr_labels=(), + default_branch="main", + ) + assert_outputs_are_printed( + { + "all-python-versions": "['3.7', '3.8', '3.9', '3.10']", + "all-python-versions-list-as-string": "3.7 3.8 3.9 3.10", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + str(sc), + ) diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index 0350c181a1dc3..52b984dfefadc 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -2,4 +2,4 @@ # This file is automatically generated by pre-commit. If you have a conflict with this file # Please do not solve it but run `breeze regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. -2e5e83c884f0fe51f91f573fcbff12ba +088f01c27036e2230099add63afb5f6e diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index e9dd89b22e4dc..2e21ac9804afa 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -1,4 +1,4 @@ - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + - Breeze commands + Breeze commands - + - - -Usage: breeze [OPTIONS] COMMAND [ARGS]... - -╭─ Basic flags for the default (shell) command ────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10) -[default: 3.7]                                               ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---postgres-version-PVersion of Postgres used.(>10< | 11 | 12 | 13 | 14)[default: 10] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                             -(cassandra | kerberos | mongo | openldap | pinot | rabbitmq | redis | statsd | trino |    -all)                                                                                      ---forward-credentials-fForward local credentials to container when running. ---db-reset-dReset DB when entering the container. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced flags for the default (shell) command ─────────────────────────────────────────────────────────────────────╮ ---use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`, `wheel`, or   -`sdist` if Airflow should be removed, installed from wheel packages or sdist packages    -available in dist folder respectively. Implies --mount-sources `remove`.                 -(none | wheel | sdist | <airflow_version>)                                               ---airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) ---use-packages-from-distInstall all found packages (--package-format determines type) from 'dist' folder when    -entering breeze.                                                                         ---package-formatFormat of packages that should be installed from dist.(wheel | sdist)[default: wheel] ---force-buildForce image build no matter if it is determined as needed. ---mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =     -selected).                                                                               -(selected | all | skip | remove)                                                         -[default: selected]                                                                      ---debian-versionDebian version used for the image.(bullseye | buster)[default: bullseye] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Developer tools ────────────────────────────────────────────────────────────────────────────────────────────────────╮ -shell          Enter breeze.py environment. this is the default command use when no other is selected.             -start-airflow  Enter breeze.py environment and starts all Airflow components in the tmux session.                  -exec           Joins the interactive shell of running airflow container                                            -stop           Stop running breeze environment.                                                                    -build-docs     Build documentation in the container.                                                               -static-checks  Run static checks.                                                                                  -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Testing ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -docker-compose-tests Run docker-compose tests.                                                                     -tests                Run the specified unit test targets. Multiple targets may be specified separated by spaces.   -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Configuration & maintenance ────────────────────────────────────────────────────────────────────────────────────────╮ -cleanup                  Cleans the cache of parameters, docker cache and optionally - currently downloaded        -images.                                                                                   -self-upgrade             Self upgrade Breeze.                                                                      -setup-autocomplete       Enables autocompletion of breeze commands.                                                -config                   Show/update configuration (Python, Backend, Cheatsheet, ASCIIART).                        -resource-check           Check if available docker resources are enough.                                           -free-space               Free space for jobs run in CI.                                                            -fix-ownership            Fix ownership of source files to be same as host user.                                    -regenerate-command-imagesRegenerate breeze command images.                                                         -command-hash-export      Outputs hash of all click commands to file or stdout if `-` is used (useful to see if     -images should be regenerated).                                                            -version                  Print information about version of apache-airflow-breeze.                                 -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ CI Image tools ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -build-image   Build CI image. Include building multiple images for all python versions (sequentially).             -pull-image    Pull and optionally verify CI images - possibly in parallel for all Python versions.                 -verify-image  Verify CI image.                                                                                     -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ -build-prod-image Build Production image. Include building multiple images for all or selected Python versions      -sequentially.                                                                                     -pull-prod-image  Pull and optionally verify Production images - possibly in parallel for all Python versions.      -verify-prod-imageVerify Production image.                                                                          -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Release management ─────────────────────────────────────────────────────────────────────────────────────────────────╮ -verify-provider-packages         Verifies if all provider code is following expectations for providers.            -prepare-provider-documentation   Prepare CHANGELOG, README and COMMITS information for providers.                  -prepare-provider-packages        Prepare sdist/whl packages of Airflow Providers.                                  -prepare-airflow-package          Prepare sdist/whl package of Airflow.                                             -release-prod-images              Release production images to DockerHub (needs DockerHub permissions).             -generate-constraints             Generates pinned constraint files with all extras from setup.py in parallel.      -find-newer-dependencies          Finds which dependencies are being upgraded.                                      -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + +Usage: breeze [OPTIONS] COMMAND [ARGS]... + +╭─ Basic flags for the default (shell) command ────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10) +[default: 3.7]                                               +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--postgres-version-PVersion of Postgres used.(>10< | 11 | 12 | 13 | 14)[default: 10] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                             +(cassandra | kerberos | mongo | openldap | pinot | rabbitmq | redis | statsd | trino |    +all)                                                                                      +--forward-credentials-fForward local credentials to container when running. +--db-reset-dReset DB when entering the container. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced flags for the default (shell) command ─────────────────────────────────────────────────────────────────────╮ +--use-airflow-versionUse (reinstall at entry) Airflow version from PyPI. It can also be `none`, `wheel`, or   +`sdist` if Airflow should be removed, installed from wheel packages or sdist packages    +available in dist folder respectively. Implies --mount-sources `remove`.                 +(none | wheel | sdist | <airflow_version>)                                               +--airflow-extrasAirflow extras to install when --use-airflow-version is used(TEXT) +--use-packages-from-distInstall all found packages (--package-format determines type) from 'dist' folder when    +entering breeze.                                                                         +--package-formatFormat of packages that should be installed from dist.(wheel | sdist)[default: wheel] +--force-buildForce image build no matter if it is determined as needed. +--mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =     +selected).                                                                               +(selected | all | skip | remove)                                                         +[default: selected]                                                                      +--debian-versionDebian version used for the image.(bullseye | buster)[default: bullseye] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Developer tools ────────────────────────────────────────────────────────────────────────────────────────────────────╮ +shell          Enter breeze.py environment. this is the default command use when no other is selected.             +start-airflow  Enter breeze.py environment and starts all Airflow components in the tmux session.                  +exec           Joins the interactive shell of running airflow container                                            +stop           Stop running breeze environment.                                                                    +build-docs     Build documentation in the container.                                                               +static-checks  Run static checks.                                                                                  +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Testing ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +docker-compose-tests Run docker-compose tests.                                                                     +tests                Run the specified unit test targets. Multiple targets may be specified separated by spaces.   +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Configuration & maintenance ────────────────────────────────────────────────────────────────────────────────────────╮ +cleanup                  Cleans the cache of parameters, docker cache and optionally - currently downloaded        +images.                                                                                   +self-upgrade             Self upgrade Breeze.                                                                      +setup-autocomplete       Enables autocompletion of breeze commands.                                                +config                   Show/update configuration (Python, Backend, Cheatsheet, ASCIIART).                        +regenerate-command-imagesRegenerate breeze command images.                                                         +command-hash-export      Outputs hash of all click commands to file or stdout if `-` is used (useful to see if     +images should be regenerated).                                                            +version                  Print information about version of apache-airflow-breeze.                                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ CI Image tools ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +build-image   Build CI image. Include building multiple images for all python versions (sequentially).             +pull-image    Pull and optionally verify CI images - possibly in parallel for all Python versions.                 +verify-image  Verify CI image.                                                                                     +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ +build-prod-image Build Production image. Include building multiple images for all or selected Python versions      +sequentially.                                                                                     +pull-prod-image  Pull and optionally verify Production images - possibly in parallel for all Python versions.      +verify-prod-imageVerify Production image.                                                                          +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ CI commands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +fix-ownership                  Fix ownership of source files to be same as host user.                              +free-space                     Free space for jobs run in CI.                                                      +resource-check                 Check if available docker resources are enough.                                     +selective-check                Checks what kind of tests should be run for an incoming commit.                     +find-newer-dependencies        Finds which dependencies are being upgraded.                                        +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Release management ─────────────────────────────────────────────────────────────────────────────────────────────────╮ +verify-provider-packages         Verifies if all provider code is following expectations for providers.            +prepare-provider-documentation   Prepare CHANGELOG, README and COMMITS information for providers.                  +prepare-provider-packages        Prepare sdist/whl packages of Airflow Providers.                                  +prepare-airflow-package          Prepare sdist/whl package of Airflow.                                             +release-prod-images              Release production images to DockerHub (needs DockerHub permissions).             +generate-constraints             Generates pinned constraint files with all extras from setup.py in parallel.      +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output-selective-check.svg b/images/breeze/output-selective-check.svg new file mode 100644 index 0000000000000..3ea08a5bf7a29 --- /dev/null +++ b/images/breeze/output-selective-check.svg @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Command: selective-check + + + + + + + + + + +Usage: breeze selective-check [OPTIONS] + +Checks what kind of tests should be run for an incoming commit. + +╭─ Selective check flags ──────────────────────────────────────────────────────────────────────────────────────────────╮ +--commit-refCommit-ish reference to the commit that should be checked(TEXT) +--pr-labelsSpace-separate list of labels which are valid for the PR(TEXT) +--default-branchBranch against which the PR should be run(TEXT)[default: main] +--github-event-nameName of the GitHub event that triggered the check                            +(pull_request | pull_request_review | pull_request_target | push | schedule) +[default: pull_request]                                                      +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/images/breeze/output-tests.svg b/images/breeze/output-tests.svg index 1d819816f2596..0bb6680bef9ae 100644 --- a/images/breeze/output-tests.svg +++ b/images/breeze/output-tests.svg @@ -19,177 +19,177 @@ font-weight: 700; } - .terminal-3301140235-matrix { + .terminal-2980146706-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-3301140235-title { + .terminal-2980146706-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-3301140235-r1 { fill: #c5c8c6;font-weight: bold } -.terminal-3301140235-r2 { fill: #c5c8c6 } -.terminal-3301140235-r3 { fill: #d0b344;font-weight: bold } -.terminal-3301140235-r4 { fill: #868887 } -.terminal-3301140235-r5 { fill: #68a0b3;font-weight: bold } -.terminal-3301140235-r6 { fill: #8d7b39 } -.terminal-3301140235-r7 { fill: #98a84b;font-weight: bold } + .terminal-2980146706-r1 { fill: #c5c8c6;font-weight: bold } +.terminal-2980146706-r2 { fill: #c5c8c6 } +.terminal-2980146706-r3 { fill: #d0b344;font-weight: bold } +.terminal-2980146706-r4 { fill: #868887 } +.terminal-2980146706-r5 { fill: #68a0b3;font-weight: bold } +.terminal-2980146706-r6 { fill: #8d7b39 } +.terminal-2980146706-r7 { fill: #98a84b;font-weight: bold } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - Command: tests + Command: tests - + - - -Usage: breeze tests [OPTIONS] [EXTRA_PYTEST_ARGS]... - -Run the specified unit test targets. Multiple targets may be specified separated by spaces. - -╭─ Basic flag for tests command ───────────────────────────────────────────────────────────────────────────────────────╮ ---integrationIntegration(s) to enable when running (can be more than one).                               -(cassandra | kerberos | mongo | openldap | pinot | rabbitmq | redis | statsd | trino | all) ---test-typeType of test to run.                                                                         -(All | Always | Core | Providers | API | CLI | Integration | Other | WWW | Postgres | MySQL  -| Helm | Quarantined)                                                                        ---db-reset-dReset DB when entering the container. ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10) -[default: 3.7]                                               ---postgres-version-PVersion of Postgres used.(>10< | 11 | 12 | 13 | 14)[default: 10] ---mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced flag for tests command ────────────────────────────────────────────────────────────────────────────────────╮ ---limit-progress-outputLimit progress to percentage only and just show the summary when tests complete. ---image-tag-tTag of the image which is used to pull or run the image (implies --mount-sources=skip   -when using to run shell or tests)                                                       -(TEXT)                                                                                  ---mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =    -selected).                                                                              -(selected | all | skip | remove)                                                        -[default: selected]                                                                     -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---verbose-vPrint verbose information about performed steps. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + +Usage: breeze tests [OPTIONS] [EXTRA_PYTEST_ARGS]... + +Run the specified unit test targets. Multiple targets may be specified separated by spaces. + +╭─ Basic flag for tests command ───────────────────────────────────────────────────────────────────────────────────────╮ +--integrationIntegration(s) to enable when running (can be more than one).                               +(cassandra | kerberos | mongo | openldap | pinot | rabbitmq | redis | statsd | trino | all) +--test-typeType of test to run.                                                                         +(All | Always | API | Always | CLI | Core | Integration | Other | Providers | WWW | Helm |   +Postgres | MySQL | Integration | Other | Quarantine)                                         +--db-reset-dReset DB when entering the container. +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10) +[default: 3.7]                                               +--postgres-version-PVersion of Postgres used.(>10< | 11 | 12 | 13 | 14)[default: 10] +--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced flag for tests command ────────────────────────────────────────────────────────────────────────────────────╮ +--limit-progress-outputLimit progress to percentage only and just show the summary when tests complete. +--image-tag-tTag of the image which is used to pull or run the image (implies --mount-sources=skip   +when using to run shell or tests)                                                       +(TEXT)                                                                                  +--mount-sourcesChoose scope of local sources that should be mounted, skipped, or removed (default =    +selected).                                                                              +(selected | all | skip | remove)                                                        +[default: selected]                                                                     +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/scripts/ci/selective_ci_checks.sh b/scripts/ci/selective_ci_checks.sh deleted file mode 100755 index 26fbf13f66ed9..0000000000000 --- a/scripts/ci/selective_ci_checks.sh +++ /dev/null @@ -1,768 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# shellcheck source=scripts/ci/libraries/_script_init.sh -. ./scripts/ci/libraries/_script_init.sh - -# Parameter: -# -# $1 - COMMIT SHA of the incoming commit. If this parameter is missing, this script does not check anything, -# it simply sets all the version outputs that determine that all tests should be run. -# This happens in case the even triggering the workflow is 'schedule' or 'push'. -# -# The logic of retrieving changes works by comparing the incoming commit with the target branch -# The commit addresses. -# -# -declare -a pattern_array - -if [[ ${PR_LABELS=} == *"full tests needed"* ]]; then - echo - echo "Found the right PR labels in '${PR_LABELS=}': 'full tests needed''" - echo - FULL_TESTS_NEEDED_LABEL="true" -else - echo - echo "Did not find the right PR labels in '${PR_LABELS=}': 'full tests needed'" - echo - FULL_TESTS_NEEDED_LABEL="false" -fi - -function check_upgrade_to_newer_dependencies_needed() { - if [[ ${GITHUB_EVENT_NAME=} == 'push' || ${GITHUB_EVENT_NAME=} == "scheduled" ]]; then - # Trigger upgrading to latest constraints when we are in push or schedule event - upgrade_to_newer_dependencies="true" - fi -} - -function output_all_basic_variables() { - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - initialization::ga_output python-versions \ - "$(initialization::parameters_to_json "${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS[@]}")" - initialization::ga_output all-python-versions \ - "$(initialization::parameters_to_json "${ALL_PYTHON_MAJOR_MINOR_VERSIONS[@]}")" - initialization::ga_output all-python-versions-list-as-string "${ALL_PYTHON_MAJOR_MINOR_VERSIONS[*]}" - initialization::ga_output python-versions-list-as-string "${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS[*]}" - initialization::ga_output kubernetes-versions-list-as-string "${CURRENT_KUBERNETES_VERSIONS[*]}" - else - initialization::ga_output python-versions \ - "$(initialization::parameters_to_json "${DEFAULT_PYTHON_MAJOR_MINOR_VERSION}")" - # this will work as long as DEFAULT_PYTHON_MAJOR_VERSION is the same on HEAD - # all-python-versions are used in BuildImage Workflow - initialization::ga_output all-python-versions \ - "$(initialization::parameters_to_json "${DEFAULT_PYTHON_MAJOR_MINOR_VERSION}")" - initialization::ga_output all-python-versions-list-as-string "${DEFAULT_PYTHON_MAJOR_MINOR_VERSION}" - initialization::ga_output python-versions-list-as-string "${DEFAULT_PYTHON_MAJOR_MINOR_VERSION}" - initialization::ga_output kubernetes-versions-list-as-string "${DEFAULT_KUBERNETES_VERSION}" - fi - initialization::ga_output default-python-version "${DEFAULT_PYTHON_MAJOR_MINOR_VERSION}" - - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - initialization::ga_output kubernetes-versions \ - "$(initialization::parameters_to_json "${CURRENT_KUBERNETES_VERSIONS[@]}")" - else - initialization::ga_output kubernetes-versions \ - "$(initialization::parameters_to_json "${KUBERNETES_VERSION}")" - fi - initialization::ga_output default-kubernetes-version "${KUBERNETES_VERSION}" - - initialization::ga_output kubernetes-modes \ - "$(initialization::parameters_to_json "${CURRENT_KUBERNETES_MODES[@]}")" - initialization::ga_output default-kubernetes-mode "${KUBERNETES_MODE}" - - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - initialization::ga_output postgres-versions \ - "$(initialization::parameters_to_json "${CURRENT_POSTGRES_VERSIONS[@]}")" - else - initialization::ga_output postgres-versions \ - "$(initialization::parameters_to_json "${POSTGRES_VERSION}")" - fi - initialization::ga_output default-postgres-version "${POSTGRES_VERSION}" - - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - initialization::ga_output mysql-versions \ - "$(initialization::parameters_to_json "${CURRENT_MYSQL_VERSIONS[@]}")" - else - initialization::ga_output mysql-versions \ - "$(initialization::parameters_to_json "${MYSQL_VERSION}")" - fi - initialization::ga_output default-mysql-version "${MYSQL_VERSION}" - - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - initialization::ga_output mssql-versions \ - "$(initialization::parameters_to_json "${CURRENT_MSSQL_VERSIONS[@]}")" - else - initialization::ga_output mssql-versions \ - "$(initialization::parameters_to_json "${MSSQL_VERSION}")" - fi - initialization::ga_output default-mssql-version "${MSSQL_VERSION}" - - - - initialization::ga_output kind-versions \ - "$(initialization::parameters_to_json "${CURRENT_KIND_VERSIONS[@]}")" - initialization::ga_output default-kind-version "${KIND_VERSION}" - - initialization::ga_output helm-versions \ - "$(initialization::parameters_to_json "${CURRENT_HELM_VERSIONS[@]}")" - initialization::ga_output default-helm-version "${HELM_VERSION}" - - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - initialization::ga_output postgres-exclude '[{ "python-version": "3.7" }]' - initialization::ga_output mssql-exclude '[{ "python-version": "3.8" }]' - initialization::ga_output mysql-exclude '[{ "python-version": "3.10" }]' - initialization::ga_output sqlite-exclude '[{ "python-version": "3.9" }]' - else - initialization::ga_output postgres-exclude '[]' - initialization::ga_output mysql-exclude '[]' - initialization::ga_output mssql-exclude '[]' - initialization::ga_output sqlite-exclude '[]' - fi - - - initialization::ga_output default-helm-version "${HELM_VERSION}" - initialization::ga_output kubernetes-exclude '[]' - - initialization::ga_output default-branch "${DEFAULT_BRANCH}" - -} - -function get_changed_files() { - start_end::group_start "Get changed files" - echo - echo "Incoming commit SHA: ${INCOMING_COMMIT_SHA}" - echo - echo "Changed files from ${INCOMING_COMMIT_SHA} vs it's first parent" - echo - CHANGED_FILES=$(git diff-tree --no-commit-id --name-only \ - -r "${INCOMING_COMMIT_SHA}^" "${INCOMING_COMMIT_SHA}" || true) - if [[ -z "${CHANGED_FILES}" ]]; then - echo - echo "${COLOR_YELLOW}WARNING: Could not find any changed files ${COLOR_RESET}" - echo Assuming that we should run all tests in this case - echo - set_outputs_run_everything_and_exit - fi - echo - echo "Changed files:" - echo - echo "${CHANGED_FILES}" - echo - readonly CHANGED_FILES - start_end::group_end -} - -function run_tests() { - initialization::ga_output run-tests "${@}" -} - -function run_kubernetes_tests() { - initialization::ga_output run-kubernetes-tests "${@}" -} - -function needs_helm_tests() { - initialization::ga_output needs-helm-tests "${@}" -} - -function needs_api_tests() { - initialization::ga_output needs-api-tests "${@}" -} - -function needs_api_codegen() { - initialization::ga_output needs-api-codegen "${@}" -} - -function needs_javascript_scans() { - initialization::ga_output needs-javascript-scans "${@}" -} - -function needs_python_scans() { - initialization::ga_output needs-python-scans "${@}" -} - -function set_test_types() { - initialization::ga_output test-types "${@}" -} - -function set_docs_build() { - initialization::ga_output docs-build "${@}" -} - -function set_image_build() { - initialization::ga_output image-build "${@}" -} - -function set_basic_checks_only() { - initialization::ga_output basic-checks-only "${@}" -} - -function set_upgrade_to_newer_dependencies() { - initialization::ga_output upgrade-to-newer-dependencies "${@}" -} - -function needs_ui_tests() { - initialization::ga_output run-ui-tests "${@}" -} - -function needs_www_tests() { - initialization::ga_output run-www-tests "${@}" -} - -if [[ ${DEFAULT_BRANCH} == "main" ]]; then - ALL_TESTS="Always API Core Other CLI Providers WWW Integration" -else - # Skips Provider tests in case current default branch is not main - ALL_TESTS="Always API Core Other CLI WWW Integration" -fi -readonly ALL_TESTS - -function set_outputs_run_everything_and_exit() { - needs_api_tests "true" - needs_api_codegen "true" - needs_helm_tests "true" - needs_javascript_scans "true" - needs_python_scans "true" - run_tests "true" - run_kubernetes_tests "true" - set_test_types "${ALL_TESTS}" - set_basic_checks_only "false" - set_docs_build "true" - set_image_build "true" - set_upgrade_to_newer_dependencies "${upgrade_to_newer_dependencies}" - needs_ui_tests "true" - needs_www_tests "true" - exit -} - -function set_outputs_run_all_python_tests() { - run_tests "true" - run_kubernetes_tests "true" - set_test_types "${ALL_TESTS}" - set_basic_checks_only "false" - set_image_build "true" - kubernetes_tests_needed="true" -} - -function set_output_skip_all_tests_and_docs_and_exit() { - needs_api_tests "false" - needs_api_codegen "false" - needs_helm_tests "false" - needs_javascript_scans "false" - needs_python_scans "false" - run_tests "false" - run_kubernetes_tests "false" - set_test_types "" - set_basic_checks_only "true" - set_docs_build "false" - set_image_build "false" - set_upgrade_to_newer_dependencies "false" - needs_ui_tests "false" - needs_www_tests "false" - exit -} - -function set_output_skip_tests_but_build_images_and_exit() { - needs_api_tests "false" - needs_api_codegen "false" - needs_helm_tests "false" - needs_javascript_scans "false" - needs_python_scans "false" - run_tests "false" - run_kubernetes_tests "false" - set_test_types "" - set_basic_checks_only "false" - set_docs_build "true" - set_image_build "true" - set_upgrade_to_newer_dependencies "${upgrade_to_newer_dependencies}" - needs_ui_tests "false" - needs_www_tests "false" - exit -} - -# Converts array of patterns into single | pattern string -# pattern_array - array storing regexp patterns -# Outputs - pattern string -function get_regexp_from_patterns() { - local test_triggering_regexp="" - local separator="" - local pattern - for pattern in "${pattern_array[@]}"; do - test_triggering_regexp="${test_triggering_regexp}${separator}${pattern}" - separator="|" - done - echo "${test_triggering_regexp}" -} - -# Shows changed files in the commit vs. the target. -# Input: -# pattern_array - array storing regexp patterns -function show_changed_files() { - local the_regexp - the_regexp=$(get_regexp_from_patterns) - echo - echo "Changed files matching the ${the_regexp} pattern:" - echo - echo "${CHANGED_FILES}" | grep -E "${the_regexp}" || true - echo -} - -# Counts changed files in the commit vs. the target -# Input: -# pattern_array - array storing regexp patterns -# Output: -# Count of changed files matching the patterns -function count_changed_files() { - echo "${CHANGED_FILES}" | grep -c -E "$(get_regexp_from_patterns)" || true -} - -function check_if_python_security_scans_should_be_run() { - start_end::group_start "Check Python security scans" - local pattern_array=( - "^airflow/.*\.py" - "^setup.py" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_python_scans "false" - else - needs_python_scans "true" - fi - start_end::group_end -} - -function check_if_setup_files_changed() { - start_end::group_start "Check setup.py/cfg changed" - local pattern_array=( - "^setup.cfg" - "^setup.py" - ) - show_changed_files - - if [[ $(count_changed_files) != "0" ]]; then - # In case the setup files changed, we automatically force upgrading to newer dependencies - # no matter what was set before. - upgrade_to_newer_dependencies="true" - fi - start_end::group_end -} - - -function check_if_javascript_security_scans_should_be_run() { - start_end::group_start "Check JavaScript security scans" - local pattern_array=( - "^airflow/.*\.[jt]sx?" - "^airflow/.*\.lock" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_javascript_scans "false" - else - needs_javascript_scans "true" - fi - start_end::group_end -} - -function check_if_api_tests_should_be_run() { - start_end::group_start "Check API tests" - local pattern_array=( - "^airflow/api" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_api_tests "false" - else - needs_api_tests "true" - fi - start_end::group_end -} - -function check_if_api_codegen_should_be_run() { - start_end::group_start "Check API codegen" - local pattern_array=( - "^airflow/api_connexion/openapi/v1.yaml" - "^clients/gen" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_api_codegen "false" - else - needs_api_codegen "true" - fi - start_end::group_end -} - -function check_if_helm_tests_should_be_run() { - start_end::group_start "Check helm tests" - local pattern_array=( - "^chart" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_helm_tests "false" - else - needs_helm_tests "true" - fi - start_end::group_end -} - -function check_if_docs_should_be_generated() { - start_end::group_start "Check docs" - local pattern_array=( - "^docs" - "^airflow/.*\.py$" - "^CHANGELOG\.txt" - "^airflow/config_templates/config\.yml" - "^chart/UPDATING\.rst" - "^chart/CHANGELOG\.txt" - "^chart/values\.schema\.json" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - echo "None of the docs changed" - else - image_build_needed="true" - docs_build_needed="true" - fi - start_end::group_end -} - -function check_if_ui_tests_should_be_run() { - start_end::group_start "Check UI" - local pattern_array=( - "^airflow/ui/.*\.[tj]sx?$" - # tsconfig.json, package.json, etc. - "^airflow/ui/[^/]+\.json$" - "^airflow/ui/.*\.lock$" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_ui_tests "false" - else - needs_ui_tests "true" - fi - start_end::group_end -} - -function check_if_www_tests_should_be_run() { - start_end::group_start "Check WWW" - local pattern_array=( - "^airflow/www/.*\.js[x]?$" - # tsconfig.json, package.json, etc. - "^airflow/www/[^/]+\.json$" - "^airflow/www/.*\.lock$" - ) - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - needs_www_tests "false" - else - needs_www_tests "true" - fi - start_end::group_end -} - - -ANY_PY_FILES_CHANGED=( - "\.py$" -) -readonly ANY_PY_FILES_CHANGED - -function check_if_any_py_files_changed() { - start_end::group_start "Check if any Python files changed" - local pattern_array=("${ANY_PY_FILES_CHANGED[@]}") - show_changed_files - - if [[ $(count_changed_files) != "0" ]]; then - image_build_needed="true" - fi - start_end::group_end -} - - -AIRFLOW_SOURCES_TRIGGERING_TESTS=( - "^.pre-commit-config.yaml$" - "^airflow" - "^chart" - "^tests" - "^kubernetes_tests" -) -readonly AIRFLOW_SOURCES_TRIGGERING_TESTS - -function check_if_tests_are_needed_at_all() { - start_end::group_start "Check tests are needed" - local pattern_array=("${AIRFLOW_SOURCES_TRIGGERING_TESTS[@]}") - show_changed_files - - if [[ $(count_changed_files) == "0" ]]; then - if [[ ${image_build_needed} == "true" ]]; then - echo "No tests needed, Skipping tests but building images." - set_output_skip_tests_but_build_images_and_exit - else - echo "None of the important files changed, Skipping tests" - set_output_skip_all_tests_and_docs_and_exit - fi - else - image_build_needed="true" - tests_needed="true" - fi - start_end::group_end -} - -function run_all_tests_if_environment_files_changed() { - start_end::group_start "Check if everything should be run" - local pattern_array=( - "^.github/workflows/" - "^Dockerfile" - "^scripts" - "^setup.py" - "^setup.cfg" - ) - show_changed_files - - if [[ $(count_changed_files) != "0" ]]; then - echo "Important environment files changed. Running everything" - set_outputs_run_everything_and_exit - fi - if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then - echo "Full tests requested by label on PR. Running everything" - set_outputs_run_everything_and_exit - fi - start_end::group_end -} - -function get_count_all_files() { - start_end::group_start "Count all airflow source files" - local pattern_array=("${AIRFLOW_SOURCES_TRIGGERING_TESTS[@]}") - show_changed_files - COUNT_ALL_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_ALL_CHANGED_FILES}" - readonly COUNT_ALL_CHANGED_FILES - start_end::group_end -} - -function get_count_api_files() { - start_end::group_start "Count API files" - local pattern_array=( - "^airflow/api" - "^airflow/api_connexion" - "^tests/api" - "^tests/api_connexion" - ) - show_changed_files - COUNT_API_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_API_CHANGED_FILES}" - readonly COUNT_API_CHANGED_FILES - start_end::group_end -} - -function get_count_cli_files() { - start_end::group_start "Count CLI files" - local pattern_array=( - "^airflow/cli" - "^tests/cli" - ) - show_changed_files - COUNT_CLI_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_CLI_CHANGED_FILES}" - readonly COUNT_CLI_CHANGED_FILES - start_end::group_end -} - -function get_count_providers_files() { - start_end::group_start "Count providers files" - local pattern_array=( - "^airflow/providers/" - "^tests/providers/" - ) - show_changed_files - COUNT_PROVIDERS_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_PROVIDERS_CHANGED_FILES}" - readonly COUNT_PROVIDERS_CHANGED_FILES - start_end::group_end -} - -function get_count_www_files() { - start_end::group_start "Count www files" - local pattern_array=( - "^airflow/www" - "^tests/www" - ) - show_changed_files - COUNT_WWW_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_WWW_CHANGED_FILES}" - readonly COUNT_WWW_CHANGED_FILES - start_end::group_end -} - -function get_count_ui_files() { - start_end::group_start "Count ui files" - local pattern_array=( - "^airflow/ui/" - ) - show_changed_files - COUNT_UI_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_UI_CHANGED_FILES}" - readonly COUNT_UI_CHANGED_FILES - start_end::group_end -} - -function get_count_kubernetes_files() { - start_end::group_start "Count kubernetes files" - local pattern_array=( - "^chart" - "^kubernetes_tests" - "^airflow/providers/cncf/kubernetes/" - "^tests/providers/cncf/kubernetes/" - ) - show_changed_files - COUNT_KUBERNETES_CHANGED_FILES=$(count_changed_files) - echo "Files count: ${COUNT_KUBERNETES_CHANGED_FILES}" - readonly COUNT_KUBERNETES_CHANGED_FILES - start_end::group_end -} - -function calculate_test_types_to_run() { - start_end::group_start "Count core/other files" - COUNT_CORE_OTHER_CHANGED_FILES=$((COUNT_ALL_CHANGED_FILES - COUNT_WWW_CHANGED_FILES - COUNT_UI_CHANGED_FILES - COUNT_PROVIDERS_CHANGED_FILES - COUNT_CLI_CHANGED_FILES - COUNT_API_CHANGED_FILES - COUNT_KUBERNETES_CHANGED_FILES)) - - readonly COUNT_CORE_OTHER_CHANGED_FILES - echo - echo "Files count: ${COUNT_CORE_OTHER_CHANGED_FILES}" - echo - if [[ ${COUNT_CORE_OTHER_CHANGED_FILES} -gt 0 ]]; then - # Running all tests because some core or other files changed - echo - echo "Looks like ${COUNT_CORE_OTHER_CHANGED_FILES} files changed in the core/other area and" - echo "We have to run all python tests. This will take longer than usual" - echo - set_outputs_run_all_python_tests - else - if [[ ${COUNT_KUBERNETES_CHANGED_FILES} != "0" ]]; then - kubernetes_tests_needed="true" - fi - tests_needed="true" - SELECTED_TESTS="" - if [[ ${COUNT_API_CHANGED_FILES} != "0" ]]; then - echo - echo "Adding API to selected files as ${COUNT_API_CHANGED_FILES} API files changed" - echo - SELECTED_TESTS="${SELECTED_TESTS} API" - fi - if [[ ${COUNT_CLI_CHANGED_FILES} != "0" ]]; then - echo - echo "Adding CLI and Kubernetes (they depend on CLI) to selected files as ${COUNT_CLI_CHANGED_FILES} CLI files changed" - echo - SELECTED_TESTS="${SELECTED_TESTS} CLI" - kubernetes_tests_needed="true" - fi - - if [[ ${DEFAULT_BRANCH} == "main" ]]; then - if [[ ${COUNT_PROVIDERS_CHANGED_FILES} != "0" ]]; then - echo - echo "Adding Providers to selected files as ${COUNT_PROVIDERS_CHANGED_FILES} Provider files changed" - echo - SELECTED_TESTS="${SELECTED_TESTS} Providers" - fi - else - echo - echo "Providers tests are not added because they are only run in case of main branch." - echo - fi - if [[ ${COUNT_WWW_CHANGED_FILES} != "0" ]]; then - echo - echo "Adding WWW to selected files as ${COUNT_WWW_CHANGED_FILES} WWW files changed" - echo - SELECTED_TESTS="${SELECTED_TESTS} WWW" - fi - initialization::ga_output test-types "Always Integration ${SELECTED_TESTS}" - fi - start_end::group_end -} - - - -upgrade_to_newer_dependencies="false" - -if (($# < 1)); then - echo - echo "No Commit SHA - running all tests (likely direct merge, or scheduled run)!" - echo - INCOMING_COMMIT_SHA="" - readonly INCOMING_COMMIT_SHA - # override FULL_TESTS_NEEDED_LABEL in main/scheduled run - FULL_TESTS_NEEDED_LABEL="true" - readonly FULL_TESTS_NEEDED_LABEL - output_all_basic_variables - check_upgrade_to_newer_dependencies_needed - set_outputs_run_everything_and_exit -else - INCOMING_COMMIT_SHA="${1}" - readonly INCOMING_COMMIT_SHA - echo - echo "Commit SHA passed: ${INCOMING_COMMIT_SHA}!" - echo - readonly FULL_TESTS_NEEDED_LABEL -fi - -check_upgrade_to_newer_dependencies_needed - -output_all_basic_variables - -image_build_needed="false" -docs_build_needed="false" -tests_needed="false" -kubernetes_tests_needed="false" - -get_changed_files -check_if_setup_files_changed -run_all_tests_if_environment_files_changed -check_if_any_py_files_changed -check_if_docs_should_be_generated -check_if_helm_tests_should_be_run -check_if_api_tests_should_be_run -check_if_api_codegen_should_be_run -check_if_javascript_security_scans_should_be_run -check_if_python_security_scans_should_be_run -check_if_ui_tests_should_be_run -check_if_www_tests_should_be_run -check_if_tests_are_needed_at_all -get_count_all_files -get_count_api_files -get_count_cli_files -get_count_providers_files -get_count_www_files -get_count_ui_files -get_count_kubernetes_files -calculate_test_types_to_run - -set_image_build "${image_build_needed}" -if [[ ${image_build_needed} == "true" ]]; then - set_basic_checks_only "false" -else - set_basic_checks_only "true" -fi -set_docs_build "${docs_build_needed}" -run_tests "${tests_needed}" -run_kubernetes_tests "${kubernetes_tests_needed}" -set_upgrade_to_newer_dependencies "${upgrade_to_newer_dependencies}" diff --git a/scripts/ci/testing/ci_run_single_airflow_test_in_docker.sh b/scripts/ci/testing/ci_run_single_airflow_test_in_docker.sh index d120b50ec75c4..bc5bc041ed17d 100755 --- a/scripts/ci/testing/ci_run_single_airflow_test_in_docker.sh +++ b/scripts/ci/testing/ci_run_single_airflow_test_in_docker.sh @@ -89,7 +89,7 @@ function run_airflow_testing_in_docker() { echo docker-compose -f "${SCRIPTS_CI_DIR}/docker-compose/base.yml" \ "${INTEGRATIONS[@]}" \ - --project-name "airflow-${TEST_TYPE}-${BACKEND}" \ + --project-name "airflow-${TEST_TYPE,,}-${BACKEND}" \ down --remove-orphans \ --volumes --timeout 10 docker-compose --log-level INFO \ @@ -97,7 +97,7 @@ function run_airflow_testing_in_docker() { "${BACKEND_DOCKER_COMPOSE[@]}" \ "${INTEGRATIONS[@]}" \ "${DOCKER_COMPOSE_LOCAL[@]}" \ - --project-name "airflow-${TEST_TYPE}-${BACKEND}" \ + --project-name "airflow-${TEST_TYPE,,}-${BACKEND}" \ run airflow "${@}" exit_code=$? docker ps @@ -112,7 +112,7 @@ function run_airflow_testing_in_docker() { docker-compose --log-level INFO -f "${SCRIPTS_CI_DIR}/docker-compose/base.yml" \ "${INTEGRATIONS[@]}" \ - --project-name "airflow-${TEST_TYPE}-${BACKEND}" \ + --project-name "airflow-${TEST_TYPE,,}-${BACKEND}" \ down --remove-orphans \ --volumes --timeout 10 set -u