Skip to content

Commit

Permalink
Adds automated installation of dependent packages (#11526)
Browse files Browse the repository at this point in the history
When extras are specifying when airflow is installed, this one triggers
installation of dependent packages. Each extra has a set of provider
packages that are needed by the extra and they will be installed
automatically if this extra is specified.

For now we do not add any version specificatiion, until we agree the
process in #11425 and then we should be able to implement an
automated way of getting information about cross-package
version dependencies.

Fixes: #11464
  • Loading branch information
potiuk authored Nov 9, 2020
1 parent a7272f4 commit ea27f90
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 17 deletions.
6 changes: 3 additions & 3 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,9 @@ aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databri
devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, salesforce,
samba, segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau,
vertica, virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci

.. END EXTRAS HERE
Expand Down
6 changes: 3 additions & 3 deletions INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databri
devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, salesforce,
samba, segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau,
vertica, virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci

# END EXTRAS HERE

Expand Down
49 changes: 43 additions & 6 deletions scripts/ci/pre_commit/pre_commit_check_order_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"""
Test for an order of dependencies in setup.py
"""

import os
import re
import sys
Expand All @@ -28,6 +27,10 @@

errors = []

MY_DIR_PATH = os.path.dirname(__file__)
SOURCE_DIR_PATH = os.path.abspath(os.path.join(MY_DIR_PATH, os.pardir, os.pardir, os.pardir))
sys.path.insert(0, SOURCE_DIR_PATH)


def _check_list_sorted(the_list: List[str], message: str) -> None:
sorted_list = sorted(the_list)
Expand Down Expand Up @@ -122,9 +125,7 @@ def check_extras_require(setup_context: str) -> None:
Test for an order of dependencies in function do_setup section
extras_require in setup.py
"""
pattern_extras_requires = re.compile(
r'EXTRAS_REQUIREMENTS: Dict\[str, Iterable\[str\]] = {(.*?)}', re.DOTALL
)
pattern_extras_requires = re.compile(r'EXTRAS_REQUIREMENTS: Dict\[str, List\[str\]] = {(.*?)}', re.DOTALL)
extras_requires = pattern_extras_requires.findall(setup_context)[0]

pattern_dependent = re.compile('\'(.*?)\'')
Expand All @@ -137,16 +138,50 @@ def check_provider_requirements(setup_context: str) -> None:
Test for an order of dependencies in function do_setup section
providers_require in setup.py
"""
pattern_extras_requires = re.compile(
pattern_extras_providers_packages = re.compile(
r'PROVIDERS_REQUIREMENTS: Dict\[str, Iterable\[str\]\] = {(.*?)}', re.DOTALL
)
extras_requires = pattern_extras_requires.findall(setup_context)[0]
extras_requires = pattern_extras_providers_packages.findall(setup_context)[0]

pattern_dependent = re.compile('"(.*?)"')
src = pattern_dependent.findall(extras_requires)
_check_list_sorted(src, "Order of dependencies in: providers_require")


def check_extras_provider_packages(setup_context: str) -> None:
"""
Test for an order of dependencies in function do_setup section
providers_require in setup.py
"""
pattern_extras_requires = re.compile(
r'EXTRAS_PROVIDERS_PACKAGES: Dict\[str, Iterable\[str\]\] = {(.*?)}', re.DOTALL
)
extras_requires = pattern_extras_requires.findall(setup_context)[0]

pattern_dependent = re.compile('"(.*?)":')
src = pattern_dependent.findall(extras_requires)
_check_list_sorted(src, "Order of dependencies in: extras_provider_packages")


def checks_extra_with_providers_exist() -> None:

from setup import EXTRAS_REQUIREMENTS, EXTRAS_PROVIDERS_PACKAGES # noqa # isort:skip

message = 'Check if all extras have providers defined in: EXTRAS_PROVIDERS_PACKAGES'
local_error = False
for key in EXTRAS_REQUIREMENTS.keys(): # noqa
if key not in EXTRAS_PROVIDERS_PACKAGES.keys(): # noqa
if not local_error:
local_error = True
print(f"Extra {key} NOK")
errors.append(
f"ERROR in {message}. The {key} extras is missing there."
" If you do not want to install any providers with this extra set it to []"
)
if not local_error:
print(f"{message} is ok")


if __name__ == '__main__':
setup_context_main = setup()
check_main_dependent_group(setup_context_main)
Expand All @@ -155,6 +190,8 @@ def check_provider_requirements(setup_context: str) -> None:
check_install_and_setup_requires(setup_context_main)
check_extras_require(setup_context_main)
check_provider_requirements(setup_context_main)
check_extras_provider_packages(setup_context_main)
checks_extra_with_providers_exist()

print()
print()
Expand Down
2 changes: 1 addition & 1 deletion scripts/in_container/run_prepare_provider_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ LIST_OF_DIRS_FILE=$(mktemp)

cd "${AIRFLOW_SOURCES}/airflow/providers" || exit 1

find . -type d | sed 's/.\///; s/\//\./g' | grep -E 'hooks|operators|sensors|secrets' \
find . -type d | sed 's/.\///; s/\//\./g' | grep -E 'hooks|operators|sensors|secrets|utils' \
> "${LIST_OF_DIRS_FILE}"

cd "${AIRFLOW_SOURCES}/provider_packages" || exit 1
Expand Down
126 changes: 122 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'psycopg2-binary>=2.7.4',
]
presto = ['presto-python-client>=0.7.0,<0.8']
qds = [
qubole = [
'qds-sdk>=1.10.4',
]
rabbitmq = [
Expand Down Expand Up @@ -540,11 +540,12 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
"plexus": plexus,
"postgres": postgres,
"presto": presto,
"qubole": qds,
"qubole": qubole,
"redis": redis,
"salesforce": salesforce,
"samba": samba,
"segment": segment,
"sendgrid": sendgrid,
"sftp": ssh,
"singularity": singularity,
"slack": slack,
Expand All @@ -556,7 +557,7 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
"zendesk": zendesk,
}

EXTRAS_REQUIREMENTS: Dict[str, Iterable[str]] = {
EXTRAS_REQUIREMENTS: Dict[str, List[str]] = {
'all_dbs': all_dbs,
'amazon': amazon,
'apache.atlas': atlas,
Expand Down Expand Up @@ -619,7 +620,8 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'plexus': plexus,
'postgres': postgres,
'presto': presto,
'qds': qds,
'qds': qubole, # TODO: remove this in Airflow 2.1
'qubole': qubole,
'rabbitmq': rabbitmq,
'redis': redis,
'salesforce': salesforce,
Expand All @@ -641,6 +643,111 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version
'yandexcloud': yandexcloud,
}

EXTRAS_PROVIDERS_PACKAGES: Dict[str, Iterable[str]] = {
'all': list(PROVIDERS_REQUIREMENTS.keys()),
# this is not 100% accurate with devel_ci definition, but we really want to have all providers
# when devel_ci extra is installed!
'devel_ci': list(PROVIDERS_REQUIREMENTS.keys()),
'all_dbs': [
"apache.cassandra",
"apache.druid",
"apache.hdfs",
"apache.hive",
"apache.pinot",
"cloudant",
"exasol",
"mongo",
"microsoft.mssql",
"mysql",
"postgres",
"presto",
"vertica",
],
'amazon': ["amazon"],
'apache.atlas': [],
'apache.beam': [],
"apache.cassandra": ["apache.cassandra"],
"apache.druid": ["apache.druid"],
"apache.hdfs": ["apache.hdfs"],
"apache.hive": ["apache.hive"],
"apache.kylin": ["apache.kylin"],
"apache.pinot": ["apache.pinot"],
"apache.presto": ["apache.presto"],
"apache.spark": ["apache.spark"],
"apache.webhdfs": ["apache.hdfs"],
'async': [],
'atlas': [], # TODO: remove this in Airflow 2.1
'aws': ["amazon"], # TODO: remove this in Airflow 2.1
'azure': ["microsoft.azure"], # TODO: remove this in Airflow 2.1
'cassandra': ["apache.cassandra"], # TODO: remove this in Airflow 2.1
'celery': ["celery"],
'cgroups': [],
'cloudant': ["cloudant"],
'cncf.kubernetes': ["cncf.kubernetes"],
'dask': ["dask"],
'databricks': ["databricks"],
'datadog': ["datadog"],
'devel': ["cncf.kubernetes", "mysql"],
'devel_hadoop': ["apache.hdfs", "apache.hive", "presto"],
'doc': [],
'docker': ["docker"],
'druid': ["apache.druid"], # TODO: remove this in Airflow 2.1
'elasticsearch': ["elasticsearch"],
'exasol': ["exasol"],
'facebook': ["facebook"],
'gcp': ["google"], # TODO: remove this in Airflow 2.1
'gcp_api': ["google"], # TODO: remove this in Airflow 2.1
'github_enterprise': [],
'google': ["google"],
'google_auth': [],
'grpc': ["grpc"],
'hashicorp': ["hashicorp"],
'hdfs': ["apache.hdfs"], # TODO: remove this in Airflow 2.1
'hive': ["apache.hive"], # TODO: remove this in Airflow 2.1
'jdbc': ["jdbc"],
'jira': ["jira"],
'kerberos': [],
'kubernetes': ["cncf.kubernetes"], # TODO: remove this in Airflow 2.1
'ldap': [],
"microsoft.azure": ["microsoft.azure"],
"microsoft.mssql": ["microsoft.mssql"],
"microsoft.winrm": ["microsoft.winrm"],
'mongo': ["mongo"],
'mssql': ["microsoft.mssql"], # TODO: remove this in Airflow 2.1
'mysql': ["microsoft.mssql"],
'odbc': ["odbc"],
'oracle': ["oracle"],
'pagerduty': ["pagerduty"],
'papermill': ["papermill"],
'password': [],
'pinot': ["apache.pinot"], # TODO: remove this in Airflow 2.1
'plexus': ["plexus"],
'postgres': ["postgres"],
'presto': ["presto"],
'qds': ["qubole"], # TODO: remove this in Airflow 2.1
'qubole': ["qubole"],
'rabbitmq': ["rabbitmq"],
'redis': ["redis"],
'salesforce': ["salesforce"],
'samba': ["samba"],
'segment': ["segment"],
'sendgrid': ["sendgrid"],
'sentry': ["sentry"],
'singularity': ["singularity"],
'slack': ["slack"],
'snowflake': ["snowflake"],
'spark': ["spark"],
'ssh': ["ssh"],
'statsd': ["statsd"],
'tableau': ["tableau"],
'vertica': ["vertica"],
'virtualenv': ["virtualenv"],
'webhdfs': ["apache.hdfs"], # TODO: remove this in Airflow 2.1
'winrm': ["microsoft.winrm"], # TODO: remove this in Airflow 2.1
'yandexcloud': ["yandexcloud"],
}


# Make devel_all contain all providers + extras + unique
devel_all = list(
set(
Expand Down Expand Up @@ -759,6 +866,17 @@ def is_package_excluded(package: str, exclusion_list: List[str]):
]


def get_provider_package_from_package_id(package_id: str):
"""
Builds the name of provider package out of the package id provided/
:param package_id: id of the package (like amazon or microsoft.azure)
:return: full name of package in PyPI
"""
package_suffix = package_id.replace(".", "-")
return f"apache-airflow-providers-{package_suffix}"


def do_setup():
"""Perform the Airflow package setup."""
install_providers_from_sources = os.getenv('INSTALL_PROVIDERS_FROM_SOURCES')
Expand Down

0 comments on commit ea27f90

Please sign in to comment.