-
-
-Usage: breeze static-checks [OPTIONS] [PRECOMMIT_ARGS]...
-
-Run static checks.
-
-â•â”€ Pre-commit flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮
-│--type-tType(s) of the static checks to run (multiple can be added). │
-│(all | black | blacken-docs | check-airflow-2-1-compatibility | │
-│check-airflow-config-yaml-consistent | check-airflow-providers-have-extras | │
-│check-apache-license-rat | check-base-operator-partial-arguments | │
-│check-base-operator-usage | check-boring-cyborg-configuration | │
-│check-breeze-top-dependencies-limited | check-builtin-literals | │
-│check-changelog-has-no-duplicates | check-daysago-import-from-utils | │
-│check-docstring-param-types | check-example-dags-urls | check-executables-have-shebangs │
-│| check-extra-packages-references | check-extras-order | check-for-inclusive-language | │
-│check-hooks-apply | check-incorrect-use-of-LoggingMixin | │
-│check-integrations-are-consistent | check-merge-conflict | check-newsfragments-are-valid│
-│| check-no-providers-in-core-examples | check-no-relative-imports | │
-│check-persist-credentials-disabled-in-github-workflows | │
-│check-pre-commit-information-consistent | check-provide-create-sessions-imports | │
-│check-provider-yaml-valid | check-providers-init-file-missing | │
-│check-providers-subpackages-init-file-exist | check-pydevd-left-in-code | │
-│check-revision-heads-map | check-safe-filter-usage-in-html | check-setup-order | │
-│check-start-date-not-used-in-defaults | check-system-tests-present | │
-│check-system-tests-tocs | check-xml | codespell | create-missing-init-py-files-tests | │
-│debug-statements | detect-private-key | doctoc | end-of-file-fixer | fix-encoding-pragma│
-│| flynt | forbid-tabs | identity | insert-license | isort | lint-chart-schema | lint-css│
-│| lint-dockerfile | lint-helm-chart | lint-javascript | lint-json-schema | lint-markdown│
-│| lint-openapi | mixed-line-ending | pretty-format-json | pydocstyle | │
-│python-no-log-warn | pyupgrade | rst-backticks | run-flake8 | run-mypy | run-shellcheck │
-│| static-check-autoflake | trailing-whitespace | update-breeze-cmd-output | │
-│update-breeze-readme-config-hash | update-extras | update-in-the-wild-to-be-sorted | │
-│update-inlined-dockerfile-scripts | update-local-yml-file | update-migration-references │
-│| update-providers-dependencies | update-setup-cfg-file | │
-│update-spelling-wordlist-to-be-sorted | update-supported-versions | │
-│update-vendored-in-k8s-json-schema | update-version | yamllint | yesqa) │
-│--file-fList of files to run the checks on.(PATH)│
-│--all-files-aRun checks on all files.│
-│--show-diff-on-failure-sShow diff for files modified by the checks.│
-│--last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref.│
-╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
-â•â”€ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
-│--commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference). Mutually │
-│exclusive with --last-commit. │
-│(TEXT) │
-│--verbose-vPrint verbose information about performed steps.│
-│--dry-run-DIf dry-run is set, commands are only printed, not executed.│
-│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│
-│--help-hShow this message and exit.│
-╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+
+
+Usage: breeze static-checks [OPTIONS] [PRECOMMIT_ARGS]...
+
+Run static checks.
+
+â•â”€ Pre-commit flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮
+│--type-tType(s) of the static checks to run (multiple can be added). │
+│(all | black | blacken-docs | check-airflow-2-1-compatibility | │
+│check-airflow-config-yaml-consistent | check-apache-license-rat | │
+│check-base-operator-partial-arguments | check-base-operator-usage | │
+│check-boring-cyborg-configuration | check-breeze-top-dependencies-limited | │
+│check-builtin-literals | check-changelog-has-no-duplicates | │
+│check-daysago-import-from-utils | check-docstring-param-types | check-example-dags-urls │
+│| check-executables-have-shebangs | check-extra-packages-references | check-extras-order│
+│| check-for-inclusive-language | check-hooks-apply | check-incorrect-use-of-LoggingMixin│
+│| check-integrations-are-consistent | check-merge-conflict | │
+│check-newsfragments-are-valid | check-no-providers-in-core-examples | │
+│check-no-relative-imports | check-persist-credentials-disabled-in-github-workflows | │
+│check-pre-commit-information-consistent | check-provide-create-sessions-imports | │
+│check-provider-yaml-valid | check-providers-init-file-missing | │
+│check-providers-subpackages-init-file-exist | check-pydevd-left-in-code | │
+│check-revision-heads-map | check-safe-filter-usage-in-html | check-setup-order | │
+│check-start-date-not-used-in-defaults | check-system-tests-present | │
+│check-system-tests-tocs | check-xml | codespell | create-missing-init-py-files-tests | │
+│debug-statements | detect-private-key | doctoc | end-of-file-fixer | fix-encoding-pragma│
+│| flynt | forbid-tabs | identity | insert-license | isort | lint-chart-schema | lint-css│
+│| lint-dockerfile | lint-helm-chart | lint-javascript | lint-json-schema | lint-markdown│
+│| lint-openapi | mixed-line-ending | pretty-format-json | pydocstyle | │
+│python-no-log-warn | pyupgrade | rst-backticks | run-flake8 | run-mypy | run-shellcheck │
+│| static-check-autoflake | trailing-whitespace | update-breeze-cmd-output | │
+│update-breeze-readme-config-hash | update-extras | update-in-the-wild-to-be-sorted | │
+│update-inlined-dockerfile-scripts | update-local-yml-file | update-migration-references │
+│| update-providers-dependencies | update-setup-cfg-file | │
+│update-spelling-wordlist-to-be-sorted | update-supported-versions | │
+│update-vendored-in-k8s-json-schema | update-version | yamllint | yesqa) │
+│--file-fList of files to run the checks on.(PATH)│
+│--all-files-aRun checks on all files.│
+│--show-diff-on-failure-sShow diff for files modified by the checks.│
+│--last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref.│
+╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+â•â”€ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│--commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference). Mutually │
+│exclusive with --last-commit. │
+│(TEXT) │
+│--verbose-vPrint verbose information about performed steps.│
+│--dry-run-DIf dry-run is set, commands are only printed, not executed.│
+│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│
+│--help-hShow this message and exit.│
+╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml
index 6bdf8a9e1c82f..65b5bce495d1a 100644
--- a/scripts/ci/docker-compose/remove-sources.yml
+++ b/scripts/ci/docker-compose/remove-sources.yml
@@ -18,9 +18,6 @@
version: "3.7"
services:
airflow:
- # Forwards local credentials to docker image
- # Useful for gcloud/aws/kubernetes etc. authorisation to be passed
- # To inside docker. Use with care - your credentials will be available to
- # Everything you install in Docker
+ # Removes airflow sources from container
volumes:
- ./empty:/opt/airflow/airflow:cached
diff --git a/scripts/ci/pre_commit/common_precommit_utils.py b/scripts/ci/pre_commit/common_precommit_utils.py
index 871aee3672ffa..4b2813b92c45c 100644
--- a/scripts/ci/pre_commit/common_precommit_utils.py
+++ b/scripts/ci/pre_commit/common_precommit_utils.py
@@ -14,10 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
from pathlib import Path
from typing import List
+AIRFLOW_SOURCES_ROOT = Path(__file__).parents[3].resolve()
+
def insert_documentation(file_path: Path, content: List[str], header: str, footer: str):
text = file_path.read_text().splitlines(keepends=True)
@@ -32,4 +33,5 @@ def insert_documentation(file_path: Path, content: List[str], header: str, foote
replacing = False
if not replacing:
result.append(line)
- file_path.write_text("".join(result))
+ src = "".join(result)
+ file_path.write_text(src)
diff --git a/scripts/ci/pre_commit/pre_commit_build_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_build_providers_dependencies.py
new file mode 100755
index 0000000000000..af2471ad62d4c
--- /dev/null
+++ b/scripts/ci/pre_commit/pre_commit_build_providers_dependencies.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import json
+import os
+import sys
+from ast import Import, ImportFrom, NodeVisitor, parse
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+from rich.console import Console
+
+console = Console(color_system="standard", width=200)
+
+AIRFLOW_PROVIDERS_IMPORT_PREFIX = "airflow.providers."
+
+AIRFLOW_SOURCES_ROOT = Path(__file__).parents[3].resolve()
+
+AIRFLOW_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
+AIRFLOW_TESTS_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "tests" / "providers"
+AIRFLOW_SYSTEM_TESTS_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "system" / "tests" / "providers"
+
+DEPENDENCIES_JSON_FILE_PATH = AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json"
+
+sys.path.insert(0, str(AIRFLOW_SOURCES_ROOT)) # make sure setup is imported from Airflow
+
+warnings: List[str] = []
+errors: List[str] = []
+
+CROSS_PROVIDERS_DEPS = "cross-providers-deps"
+DEPS = "deps"
+
+ALL_DEPENDENCIES: Dict[str, Dict[str, List[str]]] = defaultdict(lambda: defaultdict(list))
+
+ALL_PROVIDERS: Dict[str, Dict[str, Any]] = defaultdict(lambda: defaultdict())
+ALL_PROVIDER_FILES: List[Path] = []
+
+# Allow AST to parse the files.
+sys.path.append(str(AIRFLOW_SOURCES_ROOT))
+
+
+class ImportFinder(NodeVisitor):
+ """
+ AST visitor that collects all imported names in its imports
+ """
+
+ def __init__(self) -> None:
+ self.imports: List[str] = []
+ self.handled_import_exception = List[str]
+ self.tried_imports: List[str] = []
+
+ def process_import(self, import_name: str) -> None:
+ self.imports.append(import_name)
+
+ def get_import_name_from_import_from(self, node: ImportFrom) -> List[str]:
+ import_names: List[str] = []
+ for alias in node.names:
+ name = alias.name
+ fullname = f'{node.module}.{name}' if node.module else name
+ import_names.append(fullname)
+ return import_names
+
+ def visit_Import(self, node: Import):
+ for alias in node.names:
+ self.process_import(alias.name)
+
+ def visit_ImportFrom(self, node: ImportFrom):
+ if node.module == '__future__':
+ return
+ for fullname in self.get_import_name_from_import_from(node):
+ self.process_import(fullname)
+
+
+def find_all_providers_and_provider_files():
+ for (root, _, filenames) in os.walk(AIRFLOW_PROVIDERS_DIR):
+ for filename in filenames:
+ if filename == 'provider.yaml':
+ provider_file = Path(root, filename)
+ provider_name = str(provider_file.parent.relative_to(AIRFLOW_PROVIDERS_DIR)).replace(
+ os.sep, "."
+ )
+ ALL_PROVIDERS[provider_name] = yaml.safe_load(provider_file.read_text())
+ path = Path(root, filename)
+ if path.is_file() and path.name.endswith(".py"):
+ ALL_PROVIDER_FILES.append(Path(root, filename))
+
+
+def get_provider_id_from_relative_import_or_file(relative_path_or_file: str) -> Optional[str]:
+ provider_candidate = relative_path_or_file.replace(os.sep, ".").split(".")
+ while len(provider_candidate) > 0:
+ candidate_provider_id = ".".join(provider_candidate)
+ if candidate_provider_id in ALL_PROVIDERS:
+ return candidate_provider_id
+ provider_candidate = provider_candidate[:-1]
+ return None
+
+
+def get_provider_id_from_import(import_name: str, file_path: Path) -> Optional[str]:
+ if not import_name.startswith(AIRFLOW_PROVIDERS_IMPORT_PREFIX):
+ # skip silently - it's OK to get non-provider imports
+ return None
+ relative_provider_import = import_name[len(AIRFLOW_PROVIDERS_IMPORT_PREFIX) :]
+ provider_id = get_provider_id_from_relative_import_or_file(relative_provider_import)
+ if provider_id is None:
+ warnings.append(f"We could not determine provider id from import {import_name} in {file_path}")
+ return provider_id
+
+
+def get_imports_from_file(file_path: Path) -> List[str]:
+ root = parse(file_path.read_text(), file_path.name)
+ visitor = ImportFinder()
+ visitor.visit(root)
+ return visitor.imports
+
+
+def get_provider_id_from_file_name(file_path: Path) -> Optional[str]:
+ # is_relative_to is only available in Python 3.9 - we should simplify this check when we are Python 3.9+
+ try:
+ relative_path = file_path.relative_to(AIRFLOW_PROVIDERS_DIR)
+ except ValueError:
+ try:
+ relative_path = file_path.relative_to(AIRFLOW_SYSTEM_TESTS_PROVIDERS_DIR)
+ except ValueError:
+ try:
+ relative_path = file_path.relative_to(AIRFLOW_TESTS_PROVIDERS_DIR)
+ except ValueError:
+ errors.append(f"Wrong file not in the providers package = {file_path}")
+ return None
+ provider_id = get_provider_id_from_relative_import_or_file(str(relative_path))
+ if provider_id is None and file_path.name not in ["__init__.py", "get_provider_info.py"]:
+ warnings.append(f"We had a problem to classify the file {file_path} to a provider")
+ return provider_id
+
+
+def check_if_different_provider_used(file_path: Path) -> None:
+ file_provider = get_provider_id_from_file_name(file_path)
+ if not file_provider:
+ return
+ imports = get_imports_from_file(file_path)
+ for import_name in imports:
+ imported_provider = get_provider_id_from_import(import_name, file_path)
+ if imported_provider is not None and imported_provider not in ALL_PROVIDERS:
+ warnings.append(f"The provider {imported_provider} from {file_path} cannot be found.")
+ elif imported_provider and file_provider != imported_provider:
+ ALL_DEPENDENCIES[file_provider][CROSS_PROVIDERS_DEPS].append(imported_provider)
+
+
+if __name__ == '__main__':
+ find_all_providers_and_provider_files()
+ num_files = len(ALL_PROVIDER_FILES)
+ num_providers = len(ALL_PROVIDERS)
+ console.print(f"Found {len(ALL_PROVIDERS)} providers with {len(ALL_PROVIDER_FILES)} Python files.")
+
+ for file in ALL_PROVIDER_FILES:
+ check_if_different_provider_used(file)
+
+ for provider, provider_yaml_content in ALL_PROVIDERS.items():
+ ALL_DEPENDENCIES[provider][DEPS].extend(provider_yaml_content['dependencies'])
+
+ if warnings:
+ console.print("[yellow]Warnings!\n")
+ for warning in warnings:
+ console.print(f"[yellow] {warning}")
+ console.print(f"[bright_blue]Total: {len(warnings)} warnings.")
+ if errors:
+ console.print("[red]Errors!\n")
+ for error in errors:
+ console.print(f"[red] {error}")
+ console.print(f"[bright_blue]Total: {len(errors)} errors.")
+ unique_sorted_dependencies: Dict[str, Dict[str, List[str]]] = defaultdict(dict)
+ for key in sorted(ALL_DEPENDENCIES.keys()):
+ unique_sorted_dependencies[key][DEPS] = sorted(ALL_DEPENDENCIES[key][DEPS])
+ unique_sorted_dependencies[key][CROSS_PROVIDERS_DEPS] = sorted(
+ set(ALL_DEPENDENCIES[key][CROSS_PROVIDERS_DEPS])
+ )
+ if errors:
+ console.print()
+ console.print("[red]Errors found during verification. Exiting!")
+ console.print()
+ sys.exit(1)
+ DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n")
+ console.print()
+ console.print("[green]Verification complete! Success!\n")
+ console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}")
+ console.print()
diff --git a/scripts/ci/pre_commit/pre_commit_build_providers_dependencies.sh b/scripts/ci/pre_commit/pre_commit_build_providers_dependencies.sh
deleted file mode 100755
index 3aa606b6c5a4b..0000000000000
--- a/scripts/ci/pre_commit/pre_commit_build_providers_dependencies.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -euo pipefail
-
-PRE_COMMIT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-readonly PRE_COMMIT_DIR
-
-AIRFLOW_SOURCES=$(cd "${PRE_COMMIT_DIR}/../../../" && pwd);
-readonly AIRFLOW_SOURCES
-cd "${AIRFLOW_SOURCES}" || exit 1
-
-export PRINT_INFO_FROM_SCRIPTS="false"
-export SKIP_CHECK_REMOTE_IMAGE="true"
-
-PYTHONPATH="$(pwd)"
-export PYTHONPATH
-
-find airflow/providers -name '*.py' -print0 | \
- xargs -0 python3 tests/build_provider_packages_dependencies.py \
- --provider-dependencies-file "airflow/providers/dependencies.json" \
- --documentation-file CONTRIBUTING.rst
diff --git a/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py b/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py
deleted file mode 100755
index 91943d52e8107..0000000000000
--- a/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Check if the extras have providers defined.
-import os
-import sys
-from os.path import dirname
-from pathlib import Path
-from typing import List
-
-from rich import print
-
-AIRFLOW_SOURCES_DIR = os.path.abspath(os.path.join(dirname(__file__), os.pardir, os.pardir, os.pardir))
-
-sys.path.insert(0, AIRFLOW_SOURCES_DIR)
-# flake8: noqa: F401
-
-from setup import ALL_PROVIDERS # noqa
-
-sys.path.append(AIRFLOW_SOURCES_DIR)
-
-errors: List[str] = []
-
-PROVIDERS_DIR = os.path.join(AIRFLOW_SOURCES_DIR, "airflow", "providers")
-
-
-def get_provider_directory(provider: str) -> str:
- """Returns provider directory derived from name"""
- return os.path.join(PROVIDERS_DIR, *provider.split('.'))
-
-
-def check_all_providers_listed_have_directory() -> None:
- for provider in ALL_PROVIDERS:
- provider_directory = get_provider_directory(provider)
- if not os.path.isdir(provider_directory):
- errors.append(
- f"The provider {provider} is defined in setup.py: [bold]PROVIDERS_REQUIREMENTS[/] but it "
- + f"has missing {provider_directory} directory: [red]NOK[/]"
- )
- continue
- if not os.path.exists(os.path.join(provider_directory, "__init__.py")):
- errors.append(
- f"The {provider} does not have the __init__.py "
- + f"file in the {provider_directory} directory [red]NOK[/]"
- )
- if not os.path.exists(os.path.join(provider_directory, "provider.yaml")):
- errors.append(
- f"The provider {provider} does not have the provider.yaml "
- + f"in the {provider_directory} directory: [red]NOK[/]"
- )
-
-
-def check_all_providers_are_listed_in_setup_py() -> None:
- for path in Path(PROVIDERS_DIR).rglob('provider.yaml'):
- provider_name = str(path.parent.relative_to(PROVIDERS_DIR)).replace(os.sep, ".")
- if provider_name not in ALL_PROVIDERS:
- errors.append(
- f"The provider {provider_name} is missing in setup.py "
- "[bold]PROVIDERS_REQUIREMENTS[/]: [red]NOK[/]"
- )
-
-
-if __name__ == '__main__':
- check_all_providers_listed_have_directory()
- check_all_providers_are_listed_in_setup_py()
- if errors:
- for message in errors:
- print(message, file=sys.stderr)
- sys.exit(1)
- else:
- print("All providers are correctly defined in setup.py [green]OK[/]")
diff --git a/scripts/ci/pre_commit/pre_commit_check_order_setup.py b/scripts/ci/pre_commit/pre_commit_check_order_setup.py
index 8e4ac563f60c5..b0b5f1cd950f9 100755
--- a/scripts/ci/pre_commit/pre_commit_check_order_setup.py
+++ b/scripts/ci/pre_commit/pre_commit_check_order_setup.py
@@ -142,9 +142,8 @@ def check_install_and_setup_requires() -> None:
file_contents = setup_file.read()
check_main_dependent_group(file_contents)
check_alias_dependent_group(file_contents)
- check_variable_order("PROVIDERS_REQUIREMENTS")
- check_variable_order("CORE_EXTRAS_REQUIREMENTS")
- check_variable_order("ADDITIONAL_EXTRAS_REQUIREMENTS")
+ check_variable_order("CORE_EXTRAS_DEPENDENCIES")
+ check_variable_order("ADDITIONAL_EXTRAS_DEPENDENCIES")
check_variable_order("EXTRAS_DEPRECATED_ALIASES")
check_variable_order("PREINSTALLED_PROVIDERS")
check_install_and_setup_requires()
diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py
index 28d2bbc7c9e5b..5417b624fd964 100755
--- a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py
+++ b/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py
@@ -40,8 +40,7 @@
from setup import ( # noqa # isort:skip
add_all_provider_packages,
EXTRAS_DEPRECATED_ALIASES,
- EXTRAS_REQUIREMENTS,
- PROVIDERS_REQUIREMENTS,
+ EXTRAS_DEPENDENCIES,
PREINSTALLED_PROVIDERS,
)
@@ -54,7 +53,7 @@ def get_file_content(*path_elements: str) -> str:
def get_extras_from_setup() -> Set[str]:
"""Returns a set of regular (non-deprecated) extras from setup."""
- return set(EXTRAS_REQUIREMENTS.keys()) - set(EXTRAS_DEPRECATED_ALIASES.keys())
+ return set(EXTRAS_DEPENDENCIES.keys()) - set(EXTRAS_DEPRECATED_ALIASES.keys())
def get_extras_from_docs() -> Set[str]:
@@ -125,8 +124,7 @@ def check_extras(console: Console) -> bool:
f"""\
[red bold]ERROR!![/red bold]
-The "[bold]CORE_EXTRAS_REQUIREMENTS[/bold]", "[bold]ADDITIONAL_PROVIDERS_REQUIREMENTS[/bold]", and
- "[bold]PROVIDERS_REQUIREMENTS[/bold]"
+The "[bold]CORE_EXTRAS_DEPENDENCIES[/bold]"
sections in the setup file: [bold yellow]{SETUP_PY_FILE}[/bold yellow]
should be synchronized with the "Extra Packages Reference"
in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow].
diff --git a/scripts/ci/pre_commit/pre_commit_insert_extras.py b/scripts/ci/pre_commit/pre_commit_insert_extras.py
index dbcc403900426..c0c19f4ab0ea8 100755
--- a/scripts/ci/pre_commit/pre_commit_insert_extras.py
+++ b/scripts/ci/pre_commit/pre_commit_insert_extras.py
@@ -26,7 +26,7 @@
# flake8: noqa: F401
from common_precommit_utils import insert_documentation # isort: skip
-from setup import EXTRAS_REQUIREMENTS # isort:skip
+from setup import EXTRAS_DEPENDENCIES # isort:skip
sys.path.append(str(AIRFLOW_SOURCES_DIR))
@@ -52,8 +52,8 @@
global_constants_file_path = (
AIRFLOW_SOURCES_DIR / "dev" / "breeze" / "src" / "airflow_breeze" / "global_constants.py"
)
- extras_list = wrap(", ".join(EXTRAS_REQUIREMENTS.keys()), 100)
+ extras_list = wrap(", ".join(EXTRAS_DEPENDENCIES.keys()), 100)
extras_list = [line + "\n" for line in extras_list]
- extras_code = [f" {extra}\n" for extra in EXTRAS_REQUIREMENTS.keys()]
+ extras_code = [f" {extra}\n" for extra in EXTRAS_DEPENDENCIES.keys()]
insert_documentation(install_file_path, extras_list, INSTALL_HEADER, INSTALL_FOOTER)
insert_documentation(contributing_file_path, extras_list, RST_HEADER, RST_FOOTER)
diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh
index 66f2e6b083499..33a7b67d280fa 100644
--- a/scripts/in_container/_in_container_utils.sh
+++ b/scripts/in_container/_in_container_utils.sh
@@ -314,7 +314,7 @@ function install_local_airflow_with_eager_upgrade() {
function install_all_providers_from_pypi_with_eager_upgrade() {
- NO_PROVIDERS_EXTRAS=$(python -c 'import setup; print(",".join(setup.CORE_EXTRAS_REQUIREMENTS))')
+ NO_PROVIDERS_EXTRAS=$(python -c 'import setup; print(",".join(setup.CORE_EXTRAS_DEPENDENCIES))')
ALL_PROVIDERS_PACKAGES=$(python -c 'import setup; print(setup.get_all_provider_packages())')
local packages_to_install=()
local provider_package
diff --git a/scripts/in_container/run_generate_constraints.sh b/scripts/in_container/run_generate_constraints.sh
index e85c2fb9c992a..d91cbf81b6dad 100755
--- a/scripts/in_container/run_generate_constraints.sh
+++ b/scripts/in_container/run_generate_constraints.sh
@@ -25,7 +25,7 @@ mkdir -pv "${CONSTRAINTS_DIR}"
if [[ ${AIRFLOW_CONSTRAINTS_MODE} == "constraints-no-providers" ]]; then
- NO_PROVIDERS_EXTRAS=$(python -c 'import setup; print(",".join(setup.CORE_EXTRAS_REQUIREMENTS.keys()))')
+ NO_PROVIDERS_EXTRAS=$(python -c 'import setup; print(",".join(setup.CORE_EXTRAS_DEPENDENCIES.keys()))')
CURRENT_CONSTRAINT_FILE="${CONSTRAINTS_DIR}/${AIRFLOW_CONSTRAINTS_MODE}-${PYTHON_MAJOR_MINOR_VERSION}.txt"
echo
echo "UnInstall All PIP packages."
diff --git a/scripts/in_container/verify_providers.py b/scripts/in_container/verify_providers.py
index 9c5595b6f5b7a..fa4f759dec434 100755
--- a/scripts/in_container/verify_providers.py
+++ b/scripts/in_container/verify_providers.py
@@ -283,9 +283,9 @@ def get_all_providers() -> List[str]:
Returns all providers for regular packages.
:return: list of providers that are considered for provider packages
"""
- from setup import PROVIDERS_REQUIREMENTS
+ from setup import ALL_PROVIDERS
- return list(PROVIDERS_REQUIREMENTS.keys())
+ return list(ALL_PROVIDERS)
def import_all_classes(
@@ -380,16 +380,6 @@ def onerror(_):
return imported_classes, all_warnings
-def get_provider_packages() -> List[str]:
- """
- Returns all provider packages.
-
- """
- from setup import PROVIDERS_REQUIREMENTS
-
- return list(PROVIDERS_REQUIREMENTS.keys())
-
-
def is_imported_from_same_module(the_class: str, imported_name: str) -> bool:
"""
Is the class imported from another module?
diff --git a/setup.cfg b/setup.cfg
index 41f0a05d1e130..754cb00271078 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -172,6 +172,10 @@ airflow.serialization=*.json
airflow.utils=
context.pyi
+[options.data_files]
+generated=
+ generated/provider_dependencies.json
+
[options.entry_points]
console_scripts=
airflow=airflow.__main__:main
diff --git a/setup.py b/setup.py
index 996c66c736e7e..991b6b0617178 100644
--- a/setup.py
+++ b/setup.py
@@ -17,15 +17,17 @@
# under the License.
"""Setup.py for the Airflow project."""
import glob
+import json
import logging
import os
import subprocess
import sys
import unittest
from copy import deepcopy
-from os.path import dirname, relpath
+from os.path import relpath
+from pathlib import Path
from textwrap import wrap
-from typing import Dict, List
+from typing import Dict, Iterable, List, Set
from setuptools import Command, Distribution, find_namespace_packages, setup
from setuptools.command.develop import develop as develop_orig
@@ -35,6 +37,7 @@
# stdlib, which is deprecated in Python 3.10 and will be removed in 3.12.
from distutils import log # isort: skip
+
# Controls whether providers are installed from packages or directly from sources
# It is turned on by default in case of development environments such as Breeze
# And it is particularly useful when you add a new provider and there is no
@@ -46,13 +49,36 @@
version = '2.4.0.dev0'
-my_dir = dirname(__file__)
+AIRFLOW_SOURCES_ROOT = Path(__file__).parent.resolve()
+PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
+
+CROSS_PROVIDERS_DEPS = "cross-providers-deps"
+DEPS = "deps"
+
+
+#
+# NOTE! IN Airflow 2.4.+ dependencies for providers are maintained in `provider.yaml` files for each
+# provider separately. They are loaded here and if you want to modify them, you need to modify
+# corresponding provider.yaml file.
+#
+def fill_provider_dependencies() -> Dict[str, Dict[str, List[str]]]:
+ try:
+ return json.loads((AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json").read_text())
+ except Exception as e:
+ print(f"Exception while loading provider dependencies {e}")
+ # we can ignore loading dependencies when they are missing - they are only used to generate
+ # correct extras when packages are build and when we install airflow from sources
+ # (in both cases the provider_dependencies should be present).
+ return {}
+
+
+PROVIDER_DEPENDENCIES = fill_provider_dependencies()
def airflow_test_suite() -> unittest.TestSuite:
"""Test suite for Airflow tests"""
test_loader = unittest.TestLoader()
- test_suite = test_loader.discover(os.path.join(my_dir, 'tests'), pattern='test_*.py')
+ test_suite = test_loader.discover(str(AIRFLOW_SOURCES_ROOT / 'tests'), pattern='test_*.py')
return test_suite
@@ -82,7 +108,7 @@ def rm_all_files(files: List[str]) -> None:
def run(self) -> None:
"""Remove temporary files and directories."""
- os.chdir(my_dir)
+ os.chdir(str(AIRFLOW_SOURCES_ROOT))
self.rm_all_files(glob.glob('./build/*'))
self.rm_all_files(glob.glob('./**/__pycache__/*', recursive=True))
self.rm_all_files(glob.glob('./**/*.pyc', recursive=True))
@@ -129,7 +155,7 @@ def finalize_options(self) -> None:
def run(self) -> None:
"""List extras."""
- print("\n".join(wrap(", ".join(EXTRAS_REQUIREMENTS.keys()), 100)))
+ print("\n".join(wrap(", ".join(EXTRAS_DEPENDENCIES.keys()), 100)))
def git_version(version_: str) -> str:
@@ -149,8 +175,8 @@ def git_version(version_: str) -> str:
import git
try:
- repo = git.Repo(os.path.join(*[my_dir, '.git']))
- except git.NoSuchPathError:
+ repo = git.Repo(str(AIRFLOW_SOURCES_ROOT / '.git'))
+ except (git.NoSuchPathError):
logger.warning('.git directory not found: Cannot compute the git version')
return ''
except git.InvalidGitRepositoryError:
@@ -168,7 +194,7 @@ def git_version(version_: str) -> str:
return 'no_git_version'
-def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version"])) -> None:
+def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_version")) -> None:
"""
Write the Semver version + git hash to file, e.g. ".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65".
@@ -179,34 +205,13 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
file.write(text)
-pandas_requirement = 'pandas>=0.17.1'
-
+#
+# NOTE! IN Airflow 2.4.+ dependencies for providers are maintained in `provider.yaml` files for each
+# provider separately. Before, the provider dependencies were kept here. THEY ARE NOT HERE ANYMORE.
+#
# 'Start dependencies group' and 'Start dependencies group' are mark for ./scripts/ci/check_order_setup.py
# If you change this mark you should also change ./scripts/ci/check_order_setup.py
# Start dependencies group
-alibaba = [
- 'oss2>=2.14.0',
-]
-amazon = [
- 'boto3>=1.15.0',
- # watchtower 3 has been released end Jan and introduced breaking change across the board that might
- # change logging behaviour:
- # https://github.com/kislyuk/watchtower/blob/develop/Changes.rst#changes-for-v300-2022-01-26
- # TODO: update to watchtower >3
- 'watchtower~=2.0.1',
- 'jsonpath_ng>=1.5.3',
- 'redshift_connector>=2.0.888',
- 'sqlalchemy_redshift>=0.8.6',
- pandas_requirement,
- 'mypy-boto3-rds>=1.21.0',
- 'mypy-boto3-redshift-data>=1.21.0',
- 'mypy-boto3-appflow>=1.21.0',
-]
-apache_beam = [
- 'apache-beam>=2.39.0',
-]
-arangodb = ['python-arango>=7.3.2']
-asana = ['asana>=0.10']
async_packages = [
'eventlet>=0.9.7',
'gevent>=0.13',
@@ -215,29 +220,6 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
atlas = [
'atlasclient>=0.1.2',
]
-azure = [
- 'azure-batch>=8.0.0',
- 'azure-cosmos>=4.0.0',
- 'azure-datalake-store>=0.0.45',
- 'azure-identity>=1.3.1',
- 'azure-keyvault-secrets>=4.1.0,<5.0',
- 'azure-kusto-data>=0.0.43,<0.1',
- # Azure integration uses old librarires and the limits below reflect that
- # TODO: upgrade to newer versions of all the below libraries
- 'azure-mgmt-containerinstance>=1.5.0,<2.0',
- 'azure-mgmt-datafactory>=1.0.0,<2.0',
- 'azure-mgmt-datalake-store>=0.5.0',
- 'azure-mgmt-resource>=2.2.0',
- # limited due to https://github.com/Azure/azure-sdk-for-python/pull/18801 implementation released in 12.9
- 'azure-storage-blob>=12.7.0,<12.9.0',
- 'azure-storage-common>=2.1.0',
- 'azure-storage-file>=2.1.0',
- # Limited due to https://github.com/Azure/azure-uamqp-python/issues/191
- 'azure-servicebus>=7.6.1; platform_machine != "aarch64"',
-]
-cassandra = [
- 'cassandra-driver>=3.13.0',
-]
celery = [
# The Celery is known to introduce problems when upgraded to a MAJOR version. Airflow Core
# Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer
@@ -245,19 +227,15 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
# This is a crucial component of Airflow, so we should limit it to the next MAJOR version and only
# deliberately bump the version when we tested it, and we know it can be bumped.
# Bumping this version should also be connected with
- # limiting minimum airflow version supported in cncf.kubernetes provider, due to the
+ # limiting minimum airflow version supported in celery provider due to the
# potential breaking changes in Airflow Core as well (celery is added as extra, so Airflow
- # core is not hard-limited via install-requirements, only by extra).
- 'celery>=5.2.3,<6',
- 'flower>=1.0.0',
+ # core is not hard-limited via install-requires, only by extra).
+ 'celery>=5.2.3,<6'
]
cgroups = [
# Cgroupspy 0.2.2 added Python 3.10 compatibility
'cgroupspy>=0.2.2',
]
-cloudant = [
- 'cloudant>=2.0',
-]
dask = [
# Dask support is limited, we need Dask team to upgrade support for dask if we were to continue
# Supporting it in the future
@@ -265,20 +243,11 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'dask>=2.9.0',
'distributed>=2.11.1',
]
-databricks = [
- 'requests>=2.27,<3',
- 'databricks-sql-connector>=2.0.0, <3.0.0',
- 'aiohttp>=3.6.3, <4',
-]
-datadog = [
- 'datadog>=0.14.0',
-]
deprecated_api = [
'requests>=2.26.0',
]
doc = [
'click>=8.0',
- 'sphinx>=4.4.0',
# Docutils 0.17.0 converts generated into and breaks our doc formatting
# By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle
# tags for sections
@@ -292,143 +261,20 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'sphinx-copybutton',
'sphinx-jinja>=2.0',
'sphinx-rtd-theme>=0.1.6',
+ 'sphinx>=4.4.0',
'sphinxcontrib-httpdomain>=1.7.0',
'sphinxcontrib-redoc>=1.6.0',
'sphinxcontrib-spelling>=7.3',
]
-docker = [
- 'docker>=5.0.3',
-]
-drill = ['sqlalchemy-drill>=1.1.0', 'sqlparse>=0.4.1']
-druid = [
- 'pydruid>=0.4.1',
-]
-elasticsearch = [
- 'elasticsearch>7',
- 'elasticsearch-dbapi',
- 'elasticsearch-dsl>=5.0.0',
-]
-exasol = ['pyexasol>=0.5.1', pandas_requirement]
-facebook = [
- 'facebook-business>=6.0.2',
-]
flask_appbuilder_oauth = [
'flask-appbuilder[oauth]',
]
-github = [
- 'pygithub',
-]
-google = [
- # Google has very clear rules on what dependencies should be used. All the limits below
- # follow strict guidelines of Google Libraries as quoted here:
- # While this issue is open, dependents of google-api-core, google-cloud-core. and google-auth
- # should preserve >1, <3 pins on these packages.
- # https://github.com/googleapis/google-cloud-python/issues/10566
- # Some of Google Packages are limited to <2.0.0 because 2.0.0 releases of the libraries
- # Introduced breaking changes across the board. Those libraries should be upgraded soon
- # TODO: Upgrade all Google libraries that are limited to <2.0.0
- 'PyOpenSSL',
- 'google-ads>=15.1.1',
- 'google-api-core>=2.7.0,<3.0.0',
- 'google-api-python-client>=1.6.0,<2.0.0',
- 'google-auth>=1.0.0',
- 'google-auth-httplib2>=0.0.1',
- 'google-cloud-aiplatform>=1.7.1,<2.0.0',
- 'google-cloud-automl>=2.1.0',
- 'google-cloud-bigquery-datatransfer>=3.0.0',
- 'google-cloud-bigtable>=1.0.0,<2.0.0',
- 'google-cloud-build>=3.0.0',
- 'google-cloud-container>=2.2.0,<3.0.0',
- 'google-cloud-datacatalog>=3.0.0',
- 'google-cloud-dataplex>=0.1.0',
- 'google-cloud-dataproc>=3.1.0',
- 'google-cloud-dataproc-metastore>=1.2.0,<2.0.0',
- 'google-cloud-dlp>=0.11.0,<2.0.0',
- 'google-cloud-kms>=2.0.0',
- 'google-cloud-language>=1.1.1,<2.0.0',
- 'google-cloud-logging>=2.1.1',
- 'google-cloud-memcache>=0.2.0',
- 'google-cloud-monitoring>=2.0.0',
- 'google-cloud-os-login>=2.0.0',
- 'google-cloud-orchestration-airflow>=1.0.0,<2.0.0',
- 'google-cloud-pubsub>=2.0.0',
- 'google-cloud-redis>=2.0.0',
- 'google-cloud-secret-manager>=0.2.0,<2.0.0',
- 'google-cloud-spanner>=1.10.0,<2.0.0',
- 'google-cloud-speech>=0.36.3,<2.0.0',
- 'google-cloud-storage>=1.30,<2.0.0',
- 'google-cloud-tasks>=2.0.0',
- 'google-cloud-texttospeech>=0.4.0,<2.0.0',
- 'google-cloud-translate>=1.5.0,<2.0.0',
- 'google-cloud-videointelligence>=1.7.0,<2.0.0',
- 'google-cloud-vision>=0.35.2,<2.0.0',
- 'google-cloud-workflows>=0.1.0,<2.0.0',
- 'grpcio-gcp>=0.2.2',
- 'httpx',
- 'json-merge-patch>=0.2',
- 'looker-sdk>=22.2.0',
- 'pandas-gbq',
- pandas_requirement,
- 'sqlalchemy-bigquery>=1.2.1',
- # A transient dependency of google-cloud-bigquery-datatransfer, but we
- # further constrain it since older versions are buggy.
- 'proto-plus>=1.19.6',
-]
-grpc = [
- # Google has very clear rules on what dependencies should be used. All the limits below
- # follow strict guidelines of Google Libraries as quoted here:
- # While this issue is open, dependents of google-api-core, google-cloud-core. and google-auth
- # should preserve >1, <3 pins on these packages.
- # https://github.com/googleapis/google-cloud-python/issues/10566
- 'google-auth>=1.0.0, <3.0.0',
- 'google-auth-httplib2>=0.0.1',
- 'grpcio>=1.15.0',
-]
-hashicorp = [
- 'hvac>=0.10',
-]
-hdfs = [
- 'snakebite-py3',
- 'hdfs[avro,dataframe,kerberos]>=2.0.4',
-]
-hive = [
- 'hmsclient>=0.1.0',
- 'pyhive[hive]>=0.6.0',
- # in case of Python 3.9 sasl library needs to be installed with version higher or equal than
- # 0.3.1 because only that version supports Python 3.9. For other Python version pyhive[hive] pulls
- # the sasl library anyway (and there sasl library version is not relevant)
- 'sasl>=0.3.1; python_version>="3.9"',
- 'thrift>=0.9.2',
- pandas_requirement,
-]
-http = [
- # The 2.26.0 release of requests got rid of the chardet LGPL mandatory dependency, allowing us to
- # release it as a requirement for airflow
- 'requests>=2.26.0',
-]
-http_provider = [
- 'apache-airflow-providers-http',
-]
-influxdb = [
- 'influxdb-client>=1.19.0',
- pandas_requirement,
-]
-jdbc = [
- 'jaydebeapi>=1.1.1',
-]
-jenkins = [
- 'python-jenkins>=1.0.0',
-]
-jira = [
- 'JIRA>1.0.7',
-]
kerberos = [
'pykerberos>=1.1.13',
'requests_kerberos>=0.10.0',
'thrift_sasl>=0.2.0',
]
kubernetes = [
- 'cryptography>=2.0.0',
# The Kubernetes API is known to introduce problems when upgraded to a MAJOR version. Airflow Core
# Uses Kubernetes for Kubernetes executor, and we also know that Kubernetes Python client follows SemVer
# (https://github.com/kubernetes-client/python#compatibility). This is a crucial component of Airflow
@@ -436,143 +282,38 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
# tested it, and we know it can be bumped. Bumping this version should also be connected with
# limiting minimum airflow version supported in cncf.kubernetes provider, due to the
# potential breaking changes in Airflow Core as well (kubernetes is added as extra, so Airflow
- # core is not hard-limited via install-requirements, only by extra).
+ # core is not hard-limited via install-requires, only by extra).
+ 'cryptography>=2.0.0',
'kubernetes>=21.7.0,<24',
]
-kylin = ['kylinpy>=2.6']
ldap = [
'ldap3>=2.5.1',
'python-ldap',
]
leveldb = ['plyvel; platform_machine != "aarch64"']
-mongo = [
- 'dnspython>=1.13.0',
- # pymongo 4.0.0 removes connection option `ssl_cert_reqs` which is used in providers-mongo/2.2.0
- # TODO: Upgrade to pymongo 4.0.0+
- 'pymongo>=3.6.0,<4.0.0',
-]
-mssql = [
- 'pymssql>=2.1.5; platform_machine != "aarch64"',
-]
-mysql = [
- 'mysql-connector-python>=8.0.11; platform_machine != "aarch64"',
- 'mysqlclient>=1.3.6; platform_machine != "aarch64"',
-]
-neo4j = ['neo4j>=4.2.1']
-odbc = [
- 'pyodbc',
-]
-opsgenie = [
- 'opsgenie-sdk>=2.1.5',
-]
-oracle = [
- 'oracledb>=1.0.0',
-]
-pagerduty = [
- 'pdpyras>=4.1.2',
-]
pandas = [
- pandas_requirement,
-]
-papermill = [
- 'papermill[all]>=1.2.1',
- 'scrapbook[all]',
+ 'pandas>=0.17.1',
]
password = [
'bcrypt>=2.0.0',
'flask-bcrypt>=0.7.1',
]
-pinot = [
- # pinotdb v0.1.1 may still work with older versions of Apache Pinot, but we've confirmed that it
- # causes a problem with newer versions.
- 'pinotdb>0.1.2',
-]
-plexus = [
- 'arrow>=0.16.0',
-]
-postgres = [
- 'psycopg2-binary>=2.7.4',
-]
-presto = [
- 'presto-python-client>=0.8.2',
- pandas_requirement,
-]
-psrp = [
- 'pypsrp>=0.8',
-]
-qubole = [
- 'qds-sdk>=1.10.4',
-]
rabbitmq = [
'amqp',
]
-redis = [
- # Redis 4 introduced a number of changes that likely need testing including mixins in redis commands
- # as well as unquoting URLS with `urllib.parse.unquote`:
- # https://github.com/redis/redis-py/blob/master/CHANGES
- # TODO: upgrade to support redis package >=4
- 'redis~=3.2',
-]
-salesforce = ['simple-salesforce>=1.0.0', 'tableauserverclient', pandas_requirement]
-samba = [
- 'smbprotocol>=1.5.0',
-]
-segment = [
- 'analytics-python>=1.2.9',
-]
-sendgrid = [
- 'sendgrid>=6.0.0',
-]
sentry = [
'blinker>=1.1',
'sentry-sdk>=0.8.0',
]
-singularity = ['spython>=0.0.56']
-slack = [
- 'slack_sdk>=3.0.0',
-]
-snowflake = [
- 'snowflake-connector-python>=2.4.1',
- 'snowflake-sqlalchemy>=1.1.0',
-]
-spark = [
- 'pyspark',
-]
-ssh = [
- 'paramiko>=2.6.0',
- 'sshtunnel>=0.3.2',
-]
statsd = [
'statsd>=3.3.0',
]
-tableau = [
- 'tableauserverclient',
-]
-telegram = [
- 'python-telegram-bot>=13.0',
-]
-trino = [
- 'trino>=0.301.0',
- pandas_requirement,
-]
-vertica = [
- 'vertica-python>=0.5.1',
-]
virtualenv = [
'virtualenv',
]
webhdfs = [
'hdfs[avro,dataframe,kerberos]>=2.0.4',
]
-winrm = [
- 'pywinrm>=0.4',
-]
-yandex = [
- 'yandexcloud>=0.146.0',
-]
-zendesk = [
- 'zenpy>=2.0.24',
-]
# End dependencies group
# Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them
@@ -658,105 +399,59 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'yamllint',
]
-devel = cgroups + devel_only + doc + kubernetes + mypy_dependencies + mysql + pandas + password
-devel_hadoop = devel + hdfs + hive + kerberos + presto + webhdfs
-
-# Dict of all providers which are part of the Apache Airflow repository together with their requirements
-PROVIDERS_REQUIREMENTS: Dict[str, List[str]] = {
- 'airbyte': http_provider,
- 'alibaba': alibaba,
- 'amazon': amazon,
- 'apache.beam': apache_beam,
- 'apache.cassandra': cassandra,
- 'apache.drill': drill,
- 'apache.druid': druid,
- 'apache.hdfs': hdfs,
- 'apache.hive': hive,
- 'apache.kylin': kylin,
- 'apache.livy': http_provider,
- 'apache.pig': [],
- 'apache.pinot': pinot,
- 'apache.spark': spark,
- 'apache.sqoop': [],
- 'arangodb': arangodb,
- 'asana': asana,
- 'celery': celery,
- 'cloudant': cloudant,
- 'cncf.kubernetes': kubernetes,
- 'core.sql': [],
- 'databricks': databricks,
- 'datadog': datadog,
- 'dbt.cloud': http_provider,
- 'dingding': [],
- 'discord': [],
- 'docker': docker,
- 'elasticsearch': elasticsearch,
- 'exasol': exasol,
- 'facebook': facebook,
- 'ftp': [],
- 'github': github,
- 'google': google,
- 'grpc': grpc,
- 'hashicorp': hashicorp,
- 'http': http,
- 'imap': [],
- 'influxdb': influxdb,
- 'jdbc': jdbc,
- 'jenkins': jenkins,
- 'jira': jira,
- 'microsoft.azure': azure,
- 'microsoft.mssql': mssql,
- 'microsoft.psrp': psrp,
- 'microsoft.winrm': winrm,
- 'mongo': mongo,
- 'mysql': mysql,
- 'neo4j': neo4j,
- 'odbc': odbc,
- 'openfaas': [],
- 'opsgenie': opsgenie,
- 'oracle': oracle,
- 'pagerduty': pagerduty,
- 'papermill': papermill,
- 'plexus': plexus,
- 'postgres': postgres,
- 'presto': presto,
- 'qubole': qubole,
- 'redis': redis,
- 'salesforce': salesforce,
- 'samba': samba,
- 'segment': segment,
- 'sendgrid': sendgrid,
- 'sftp': ssh,
- 'singularity': singularity,
- 'slack': slack,
- 'snowflake': snowflake,
- 'sqlite': [],
- 'ssh': ssh,
- 'tableau': tableau,
- 'telegram': telegram,
- 'trino': trino,
- 'vertica': vertica,
- 'yandex': yandex,
- 'zendesk': zendesk,
-}
+
+def get_provider_dependencies(provider_name: str) -> List[str]:
+ return PROVIDER_DEPENDENCIES[provider_name][DEPS]
+
+
+def get_unique_dependency_list(req_list_iterable: Iterable[List[str]]):
+ _all_reqs: Set[str] = set()
+ for req_list in req_list_iterable:
+ for req in req_list:
+ _all_reqs.add(req)
+ return list(_all_reqs)
+
+
+devel = get_unique_dependency_list(
+ [
+ cgroups,
+ devel_only,
+ doc,
+ kubernetes,
+ mypy_dependencies,
+ get_provider_dependencies('mysql'),
+ pandas,
+ password,
+ ]
+)
+
+devel_hadoop = get_unique_dependency_list(
+ [
+ devel,
+ get_provider_dependencies('apache.hdfs'),
+ get_provider_dependencies('apache.hive'),
+ kerberos,
+ get_provider_dependencies('presto'),
+ webhdfs,
+ ]
+)
# Those are all additional extras which do not have their own 'providers'
# The 'apache.atlas' and 'apache.webhdfs' are extras that provide additional libraries
# but they do not have separate providers (yet?), they are merely there to add extra libraries
# That can be used in custom python/bash operators.
-ADDITIONAL_EXTRAS_REQUIREMENTS: Dict[str, List[str]] = {
+ADDITIONAL_EXTRAS_DEPENDENCIES: Dict[str, List[str]] = {
'apache.atlas': atlas,
'apache.webhdfs': webhdfs,
}
-
# Those are extras that are extensions of the 'core' Airflow. They provide additional features
# To airflow core. They do not have separate providers because they do not have any operators/hooks etc.
-CORE_EXTRAS_REQUIREMENTS: Dict[str, List[str]] = {
+CORE_EXTRAS_DEPENDENCIES: Dict[str, List[str]] = {
'async': async_packages,
- 'celery': celery, # also has provider, but it extends the core with the CeleryExecutor
+ 'celery': celery,
'cgroups': cgroups,
- 'cncf.kubernetes': kubernetes, # also has provider, but it extends the core with the KubernetesExecutor
+ 'cncf.kubernetes': kubernetes,
'dask': dask,
'deprecated_api': deprecated_api,
'github_enterprise': flask_appbuilder_oauth,
@@ -772,23 +467,17 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'virtualenv': virtualenv,
}
-EXTRAS_REQUIREMENTS: Dict[str, List[str]] = deepcopy(CORE_EXTRAS_REQUIREMENTS)
+EXTRAS_DEPENDENCIES: Dict[str, List[str]] = deepcopy(CORE_EXTRAS_DEPENDENCIES)
def add_extras_for_all_providers() -> None:
- """
- Adds extras for all providers.
- By default all providers have the same extra name as provider id, for example
- 'apache.hive' extra has 'apache.hive' provider requirement.
- """
- for provider_name, provider_requirement in PROVIDERS_REQUIREMENTS.items():
- EXTRAS_REQUIREMENTS[provider_name] = provider_requirement
+ for (provider_name, provider_dict) in PROVIDER_DEPENDENCIES.items():
+ EXTRAS_DEPENDENCIES[provider_name] = provider_dict[DEPS]
def add_additional_extras() -> None:
- """Adds extras for all additional extras."""
- for extra_name, extra_requirement in ADDITIONAL_EXTRAS_REQUIREMENTS.items():
- EXTRAS_REQUIREMENTS[extra_name] = extra_requirement
+ for (extra_name, extra_dependencies) in ADDITIONAL_EXTRAS_DEPENDENCIES.items():
+ EXTRAS_DEPENDENCIES[extra_name] = extra_dependencies
add_extras_for_all_providers()
@@ -804,7 +493,7 @@ def add_additional_extras() -> None:
'aws': 'amazon',
'azure': 'microsoft.azure',
'cassandra': 'apache.cassandra',
- 'crypto': '', # All crypto requirements are installation requirements of core Airflow
+ 'crypto': '', # this is legacy extra - all dependencies are already "install-requires"
'druid': 'apache.druid',
'gcp': 'google',
'gcp_api': 'google',
@@ -830,19 +519,19 @@ def add_extras_for_all_deprecated_aliases() -> None:
"""
Add extras for all deprecated aliases. Requirements for those deprecated aliases are the same
as the extras they are replaced with.
- The requirements are not copies - those are the same lists as for the new extras. This is intended.
+ The dependencies are not copies - those are the same lists as for the new extras. This is intended.
Thanks to that if the original extras are later extended with providers, aliases are extended as well.
"""
for alias, extra in EXTRAS_DEPRECATED_ALIASES.items():
- requirements = EXTRAS_REQUIREMENTS.get(extra) if extra != '' else []
- if requirements is None:
+ dependencies = EXTRAS_DEPENDENCIES.get(extra) if extra != '' else []
+ if dependencies is None:
raise Exception(f"The extra {extra} is missing for deprecated alias {alias}")
- EXTRAS_REQUIREMENTS[alias] = requirements
+ EXTRAS_DEPENDENCIES[alias] = dependencies
def add_all_deprecated_provider_packages() -> None:
"""
- For deprecated aliases that are providers, we will swap the providers requirements to instead
+ For deprecated aliases that are providers, we will swap the providers dependencies to instead
be the provider itself.
e.g. {"kubernetes": ["kubernetes>=3.0.0, <12.0.0", ...]} becomes
@@ -851,7 +540,7 @@ def add_all_deprecated_provider_packages() -> None:
for alias, provider in EXTRAS_DEPRECATED_ALIASES.items():
if alias in EXTRAS_DEPRECATED_ALIASES_NOT_PROVIDERS:
continue
- replace_extra_requirement_with_provider_packages(alias, [provider])
+ replace_extra_dependencies_with_provider_packages(alias, [provider])
add_extras_for_all_deprecated_aliases()
@@ -862,7 +551,7 @@ def add_all_deprecated_provider_packages() -> None:
# This is list of all providers. It's a shortcut for anyone who would like to easily get list of
# All providers. It is used by pre-commits.
-ALL_PROVIDERS = list(PROVIDERS_REQUIREMENTS.keys())
+ALL_PROVIDERS = list(PROVIDER_DEPENDENCIES.keys())
ALL_DB_PROVIDERS = [
'apache.cassandra',
@@ -886,30 +575,35 @@ def add_all_deprecated_provider_packages() -> None:
'vertica',
]
-# Special requirements for all database-related providers. They are de-duplicated.
-all_dbs = list({req for db_provider in ALL_DB_PROVIDERS for req in PROVIDERS_REQUIREMENTS[db_provider]})
-# Requirements for all "user" extras (no devel). They are de-duplicated. Note that we do not need
-# to separately add providers requirements - they have been already added as 'providers' extras above
-_all_requirements = list({req for extras_reqs in EXTRAS_REQUIREMENTS.values() for req in extras_reqs})
+def get_all_db_dependencies() -> List[str]:
+ _all_db_reqs: Set[str] = set()
+ for provider in ALL_DB_PROVIDERS:
+ for req in PROVIDER_DEPENDENCIES[provider][DEPS]:
+ _all_db_reqs.add(req)
+ return list(_all_db_reqs)
-# All user extras here
-EXTRAS_REQUIREMENTS["all"] = _all_requirements
+
+# Special dependencies for all database-related providers. They are de-duplicated.
+all_dbs = get_all_db_dependencies()
# All db user extras here
-EXTRAS_REQUIREMENTS["all_dbs"] = all_dbs + pandas
+EXTRAS_DEPENDENCIES["all_dbs"] = all_dbs
+
+# Requirements for all "user" extras (no devel). They are de-duplicated. Note that we do not need
+# to separately add providers dependencies - they have been already added as 'providers' extras above
+_all_dependencies = get_unique_dependency_list(EXTRAS_DEPENDENCIES.values())
+
+# All user extras here
+EXTRAS_DEPENDENCIES["all"] = _all_dependencies
-# This can be simplified to devel_hadoop + _all_requirements due to inclusions
+# This can be simplified to devel_hadoop + _all_dependencies due to inclusions
# but we keep it for explicit sake. We are de-duplicating it anyway.
-devel_all = list(set(_all_requirements + doc + devel + devel_hadoop))
+devel_all = get_unique_dependency_list([_all_dependencies, doc, devel, devel_hadoop])
# Those are packages excluded for "all" dependencies
PACKAGES_EXCLUDED_FOR_ALL = []
-PACKAGES_EXCLUDED_FOR_ALL.extend(
- [
- 'snakebite',
- ]
-)
+PACKAGES_EXCLUDED_FOR_ALL.extend(['snakebite'])
def is_package_excluded(package: str, exclusion_list: List[str]) -> bool:
@@ -934,26 +628,27 @@ def is_package_excluded(package: str, exclusion_list: List[str]) -> bool:
# Those are extras that we have to add for development purposes
# They can be use to install some predefined set of dependencies.
-EXTRAS_REQUIREMENTS["doc"] = doc
-EXTRAS_REQUIREMENTS["devel"] = devel # devel already includes doc
-EXTRAS_REQUIREMENTS["devel_hadoop"] = devel_hadoop # devel_hadoop already includes devel
-EXTRAS_REQUIREMENTS["devel_all"] = devel_all
-EXTRAS_REQUIREMENTS["devel_ci"] = devel_ci
+EXTRAS_DEPENDENCIES["doc"] = doc
+EXTRAS_DEPENDENCIES["devel"] = devel # devel already includes doc
+EXTRAS_DEPENDENCIES["devel_hadoop"] = devel_hadoop # devel_hadoop already includes devel
+EXTRAS_DEPENDENCIES["devel_all"] = devel_all
+EXTRAS_DEPENDENCIES["devel_ci"] = devel_ci
-def sort_extras_requirements() -> Dict[str, List[str]]:
+def sort_extras_dependencies() -> Dict[str, List[str]]:
"""
The dictionary order remains when keys() are retrieved.
Sort both: extras and list of dependencies to make it easier to analyse problems
external packages will be first, then if providers are added they are added at the end of the lists.
"""
- sorted_requirements = dict(sorted(EXTRAS_REQUIREMENTS.items()))
- for extra_list in sorted_requirements.values():
- extra_list.sort()
- return sorted_requirements
+ sorted_dependencies: Dict[str, List[str]] = {}
+ sorted_extra_ids = sorted(EXTRAS_DEPENDENCIES.keys())
+ for extra_id in sorted_extra_ids:
+ sorted_dependencies[extra_id] = sorted(EXTRAS_DEPENDENCIES[extra_id])
+ return sorted_dependencies
-EXTRAS_REQUIREMENTS = sort_extras_requirements()
+EXTRAS_DEPENDENCIES = sort_extras_dependencies()
# Those providers are pre-installed always when airflow is installed.
# Those providers do not have dependency on airflow2.0 because that would lead to circular dependencies.
@@ -966,7 +661,7 @@ def sort_extras_requirements() -> Dict[str, List[str]]:
]
-def get_provider_package_from_package_id(package_id: str) -> str:
+def get_provider_package_name_from_package_id(package_id: str) -> str:
"""
Builds the name of provider package out of the package id provided/
@@ -986,8 +681,8 @@ def get_all_provider_packages() -> str:
"""Returns all provider packages configured in setup.py"""
excluded_providers = get_excluded_providers()
return " ".join(
- get_provider_package_from_package_id(package)
- for package in PROVIDERS_REQUIREMENTS
+ get_provider_package_name_from_package_id(package)
+ for package in ALL_PROVIDERS
if package not in excluded_providers
)
@@ -1013,27 +708,30 @@ def parse_config_files(self, *args, **kwargs) -> None:
]
provider_yaml_files = glob.glob("airflow/providers/**/provider.yaml", recursive=True)
for provider_yaml_file in provider_yaml_files:
- provider_relative_path = relpath(provider_yaml_file, os.path.join(my_dir, "airflow"))
+ provider_relative_path = relpath(provider_yaml_file, str(AIRFLOW_SOURCES_ROOT / "airflow"))
self.package_data['airflow'].append(provider_relative_path)
else:
self.install_requires.extend(
- [get_provider_package_from_package_id(package_id) for package_id in PREINSTALLED_PROVIDERS]
+ [
+ get_provider_package_name_from_package_id(package_id)
+ for package_id in PREINSTALLED_PROVIDERS
+ ]
)
-def replace_extra_requirement_with_provider_packages(extra: str, providers: List[str]) -> None:
+def replace_extra_dependencies_with_provider_packages(extra: str, providers: List[str]) -> None:
"""
- Replaces extra requirement with provider package. The intention here is that when
+ Replaces extra dependencies with provider package. The intention here is that when
the provider is added as dependency of extra, there is no need to add the dependencies
separately. This is not needed and even harmful, because in case of future versions of
- the provider, the requirements might change, so hard-coding requirements from the version
+ the provider, the dependencies might change, so hard-coding dependencies from the version
that was available at the release time might cause dependency conflicts in the future.
Say for example that you have salesforce provider with those deps:
{ 'salesforce': ['simple-salesforce>=1.0.0', 'tableauserverclient'] }
- Initially ['salesforce'] extra has those requirements and it works like that when you install
+ Initially ['salesforce'] extra has those dependencies, and it works like that when you install
it when INSTALL_PROVIDERS_FROM_SOURCES is set to `true` (during the development). However, when
the production installation is used, The dependencies are changed:
@@ -1042,7 +740,7 @@ def replace_extra_requirement_with_provider_packages(extra: str, providers: List
And then, 'apache-airflow-providers-salesforce' package has those 'install_requires' dependencies:
['simple-salesforce>=1.0.0', 'tableauserverclient']
- So transitively 'salesforce' extra has all the requirements it needs and in case the provider
+ So transitively 'salesforce' extra has all the dependencies it needs and in case the provider
changes its dependencies, they will transitively change as well.
In the constraint mechanism we save both - provider versions and it's dependencies
@@ -1059,26 +757,26 @@ def replace_extra_requirement_with_provider_packages(extra: str, providers: List
:param providers: list of provider ids
"""
if extra in ['cncf.kubernetes', 'kubernetes', 'celery']:
- EXTRAS_REQUIREMENTS[extra].extend(
- [get_provider_package_from_package_id(package_name) for package_name in providers]
+ EXTRAS_DEPENDENCIES[extra].extend(
+ [get_provider_package_name_from_package_id(package_name) for package_name in providers]
)
else:
- EXTRAS_REQUIREMENTS[extra] = [
- get_provider_package_from_package_id(package_name) for package_name in providers
+ EXTRAS_DEPENDENCIES[extra] = [
+ get_provider_package_name_from_package_id(package_name) for package_name in providers
]
-def add_provider_packages_to_extra_requirements(extra: str, providers: List[str]) -> None:
+def add_provider_packages_to_extra_dependencies(extra: str, providers: List[str]) -> None:
"""
- Adds provider packages as requirements to extra. This is used to add provider packages as requirements
- to the "bulk" kind of extras. Those bulk extras do not have the detailed 'extra' requirements as
+ Adds provider packages as dependencies to extra. This is used to add provider packages as dependencies
+ to the "bulk" kind of extras. Those bulk extras do not have the detailed 'extra' dependencies as
initial values, so instead of replacing them (see previous function) we can extend them.
:param extra: Name of the extra to add providers to
:param providers: list of provider ids
"""
- EXTRAS_REQUIREMENTS[extra].extend(
- [get_provider_package_from_package_id(package_name) for package_name in providers]
+ EXTRAS_DEPENDENCIES[extra].extend(
+ [get_provider_package_name_from_package_id(package_name) for package_name in providers]
)
@@ -1092,13 +790,13 @@ def add_all_provider_packages() -> None:
as the new provider is not yet in PyPI.
"""
- for provider in ALL_PROVIDERS:
- replace_extra_requirement_with_provider_packages(provider, [provider])
- add_provider_packages_to_extra_requirements("all", ALL_PROVIDERS)
- add_provider_packages_to_extra_requirements("devel_ci", ALL_PROVIDERS)
- add_provider_packages_to_extra_requirements("devel_all", ALL_PROVIDERS)
- add_provider_packages_to_extra_requirements("all_dbs", ALL_DB_PROVIDERS)
- add_provider_packages_to_extra_requirements(
+ for provider_id in ALL_PROVIDERS:
+ replace_extra_dependencies_with_provider_packages(provider_id, [provider_id])
+ add_provider_packages_to_extra_dependencies("all", ALL_PROVIDERS)
+ add_provider_packages_to_extra_dependencies("devel_ci", ALL_PROVIDERS)
+ add_provider_packages_to_extra_dependencies("devel_all", ALL_PROVIDERS)
+ add_provider_packages_to_extra_dependencies("all_dbs", ALL_DB_PROVIDERS)
+ add_provider_packages_to_extra_dependencies(
"devel_hadoop", ["apache.hdfs", "apache.hive", "presto", "trino"]
)
add_all_deprecated_provider_packages()
@@ -1164,7 +862,7 @@ def include_provider_namespace_packages_when_installing_from_sources() -> None:
setup(
distclass=AirflowDistribution,
version=version,
- extras_require=EXTRAS_REQUIREMENTS,
+ extras_require=EXTRAS_DEPENDENCIES,
download_url=('https://archive.apache.org/dist/airflow/' + version),
cmdclass={
'extra_clean': CleanCommand,
diff --git a/tests/build_provider_packages_dependencies.py b/tests/build_provider_packages_dependencies.py
deleted file mode 100644
index 14e3e3fca7d36..0000000000000
--- a/tests/build_provider_packages_dependencies.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import json
-import os
-import sys
-from ast import Import, ImportFrom, NodeVisitor, parse
-from collections import defaultdict
-from os.path import dirname, sep
-from typing import Dict, List, Optional, Tuple
-
-from setup import PROVIDERS_REQUIREMENTS
-
-sys.path.append(os.path.join(dirname(__file__), os.pardir))
-
-
-AIRFLOW_PROVIDERS_FILE_PREFIX = f"airflow{sep}providers{sep}"
-AIRFLOW_TESTS_PROVIDERS_FILE_PREFIX = f"tests{sep}providers{sep}"
-AIRFLOW_PROVIDERS_IMPORT_PREFIX = "airflow.providers."
-
-# List of information messages generated
-infos: List[str] = []
-# List of warnings generated
-warnings: List[str] = []
-# list of errors generated
-errors: List[str] = []
-
-# store dependencies
-dependencies: Dict[str, List[str]] = defaultdict(list)
-
-
-def find_provider(provider_elements: List[str]) -> Optional[str]:
- """
- Finds provider name from the list of elements provided. It looks the providers up
- in PROVIDERS_REQUIREMENTS dict taken from the setup.py.
-
- :param provider_elements: array of elements of the path (split)
- :return: provider name or None if no provider could be found
- """
- provider = ""
- separator = ""
- provider_keys = PROVIDERS_REQUIREMENTS.keys()
- for element in provider_elements:
- provider = provider + separator + element
- if provider in provider_keys:
- return provider
- separator = "."
- return None
-
-
-def get_provider_from_file_name(file_name: str) -> Optional[str]:
- """
- Retrieves provider name from file name
- :param file_name: name of the file
- :return: provider name or None if no provider could be found
- """
- if (
- AIRFLOW_PROVIDERS_FILE_PREFIX not in file_name
- and AIRFLOW_TESTS_PROVIDERS_FILE_PREFIX not in file_name
- ):
- # We should only check file that are provider
- errors.append(f"Wrong file not in the providers package = {file_name}")
- return None
- suffix = get_file_suffix(file_name)
- assert suffix
- split_path = suffix.split(sep)[2:]
- provider = find_provider(split_path)
- if not provider and file_name.endswith("__init__.py"):
- infos.append(f"Skipped file = {file_name}")
- elif not provider:
- warnings.append(f"Provider not found for path = {file_name}")
- return provider
-
-
-def get_file_suffix(file_name) -> Optional[str]:
- if AIRFLOW_PROVIDERS_FILE_PREFIX in file_name:
- return file_name[file_name.find(AIRFLOW_PROVIDERS_FILE_PREFIX) :]
- if AIRFLOW_TESTS_PROVIDERS_FILE_PREFIX in file_name:
- return file_name[file_name.find(AIRFLOW_TESTS_PROVIDERS_FILE_PREFIX) :]
- return None
-
-
-def get_provider_from_import(import_name: str) -> Optional[str]:
- """
- Retrieves provider name from file name
- :param import_name: name of the import
- :return: provider name or None if no provider could be found
- """
- if AIRFLOW_PROVIDERS_IMPORT_PREFIX not in import_name:
- # skip silently - we expect non-providers imports
- return None
- suffix = import_name[import_name.find(AIRFLOW_PROVIDERS_IMPORT_PREFIX) :]
- split_import = suffix.split(".")[2:]
- provider = find_provider(split_import)
- if not provider:
- warnings.append(f"Provider not found for import = {import_name}")
- return provider
-
-
-class ImportFinder(NodeVisitor):
- """
- AST visitor that collects all imported names in its imports
- """
-
- def __init__(self, filename: str) -> None:
- self.imports: List[str] = []
- self.filename = filename
- self.handled_import_exception = List[str]
- self.tried_imports: List[str] = []
-
- def process_import(self, import_name: str) -> None:
- self.imports.append(import_name)
-
- def get_import_name_from_import_from(self, node: ImportFrom) -> List[str]:
- """
- Retrieves import name from the "from" import.
- :param node: ImportFrom name
- :return: import name
- """
- import_names: List[str] = []
- for alias in node.names:
- name = alias.name
- fullname = f'{node.module}.{name}' if node.module else name
- import_names.append(fullname)
- return import_names
-
- def visit_Import(self, node: Import):
- for alias in node.names:
- self.process_import(alias.name)
-
- def visit_ImportFrom(self, node: ImportFrom):
- if node.module == '__future__':
- return
- for fullname in self.get_import_name_from_import_from(node):
- self.process_import(fullname)
-
-
-def get_imports_from_file(file_name: str) -> List[str]:
- """
- Retrieves imports from file.
- :param file_name: name of the file
- :return: list of import names
- """
- try:
- with open(file_name, encoding="utf-8") as f:
- root = parse(f.read(), file_name)
- except Exception:
- print(f"Error when opening file {file_name}", file=sys.stderr)
- raise
- visitor = ImportFinder(file_name)
- visitor.visit(root)
- return visitor.imports
-
-
-def check_if_different_provider_used(file_name: str) -> None:
- file_provider = get_provider_from_file_name(file_name)
- if not file_provider:
- return
- imports = get_imports_from_file(file_name)
- for import_name in imports:
- import_provider = get_provider_from_import(import_name)
- if import_provider and file_provider != import_provider:
- dependencies[file_provider].append(import_provider)
-
-
-def parse_arguments() -> Tuple[str, str, str]:
- import argparse
-
- parser = argparse.ArgumentParser(
- description='Checks if dependencies between packages are handled correctly.'
- )
- parser.add_argument(
- "-f", "--provider-dependencies-file", help="Stores dependencies between providers in the file(.json)"
- )
- parser.add_argument(
- "-d", "--documentation-file", help="Updates package documentation in the file specified (.rst)"
- )
- parser.add_argument('files', nargs='*')
- args = parser.parse_args()
-
- if len(args.files) < 1:
- parser.print_usage()
- print()
- sys.exit(2)
- return args.files, args.provider_dependencies_file, args.documentation_file
-
-
-PREFIX = " "
-
-HEADER = """
-========================== ===========================
-Package Extras
-========================== ===========================
-"""
-FOOTER = """========================== ===========================
-
-"""
-
-
-def insert_documentation(deps_dict: Dict[str, List[str]], res: List[str]) -> None:
- res += HEADER.splitlines(keepends=True)
- for package, deps in deps_dict.items():
- deps_str = ",".join(deps)
- res.append(f"{package:27}{deps_str}\n")
- res += FOOTER.splitlines(keepends=True)
-
-
-if __name__ == '__main__':
- print()
- files, provider_dependencies_file_name, documentation_file_name = parse_arguments()
- num_files = 0
- for file in files:
- check_if_different_provider_used(file)
- num_files += 1
- print(f"Verified {num_files} files.")
- if infos:
- print("\nInformation messages:\n")
- for info in infos:
- print(PREFIX + info)
- print(f"Total: {len(infos)} information messages.")
- if warnings:
- print("\nWarnings!\n")
- for warning in warnings:
- print(PREFIX + warning)
- print(f"Total: {len(warnings)} warnings.")
- if errors:
- print("\nErrors!\n")
- for error in errors:
- print(PREFIX + error)
- print(f"Total: {len(errors)} errors.")
- unique_sorted_dependencies: Dict[str, List[str]] = {}
- for key in sorted(dependencies.keys()):
- unique_sorted_dependencies[key] = sorted(set(dependencies[key]))
- if provider_dependencies_file_name:
- with open(provider_dependencies_file_name, "w") as providers_file:
- json.dump(unique_sorted_dependencies, providers_file, indent=2)
- providers_file.write("\n")
- print()
- print(f"Written provider dependencies to the file {provider_dependencies_file_name}")
- print()
- if documentation_file_name:
- with open(documentation_file_name, encoding="utf-8") as documentation_file:
- text = documentation_file.readlines()
- replacing = False
- result: List[str] = []
- for line in text:
- if line.startswith(" .. START PACKAGE DEPENDENCIES HERE"):
- replacing = True
- result.append(line)
- insert_documentation(unique_sorted_dependencies, result)
- if line.startswith(" .. END PACKAGE DEPENDENCIES HERE"):
- replacing = False
- if not replacing:
- result.append(line)
- with open(documentation_file_name, "w", encoding="utf-8") as documentation_file:
- documentation_file.write("".join(result))
- print()
- print(f"Written package extras to the file {documentation_file_name}")
- print()
- if errors:
- print()
- print("ERROR! Errors found during verification. Exiting!")
- print()
- sys.exit(1)
- print()
- print("Verification complete! Success!")
- print()