Skip to content

Commit

Permalink
Fix reproducibility of prepared provider packages (fix flit frontend)
Browse files Browse the repository at this point in the history
After some checks it turned out that reproducibility of produced
packages depends not only on the build backend configured for the
project but also on the build front-end used - because frontend is
the one to modify meta-data in prepared packages - including the build
tool used, it's version and metadata version supported by the front-end.

That's why in order to maintain reproducibility for anyone who builds
the packages, we have to pin not only the build backend in
pyproject.toml (flit-core) but also build fronted used (flit).

Since package preparation is done with breeze, we can do it by
pinning flit (and just in case also flit-core) so that anyone who
builds specific version of the package will use exactly the same flit
as the person who built the original packages.

This way we will avoid reproducibility problems experienced with 1.5.0
release of FAB.
  • Loading branch information
potiuk committed Nov 5, 2024
1 parent 90b9847 commit 0dcf4ce
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 33 deletions.
1 change: 0 additions & 1 deletion dev/README_RELEASE_PROVIDER_PACKAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,6 @@ export AIRFLOW_REPO_ROOT=$(pwd -P)
rm -rf ${AIRFLOW_REPO_ROOT}/dist/*
```


* Release candidate packages:

```shell script
Expand Down
2 changes: 1 addition & 1 deletion dev/breeze/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT.

---------------------------------------------------------------------------------------------------------

Package config hash: f13c42703e0a262d9f3c1bee608ff32c368be4c6a11f150a2f95809938641f5ec07904d5cc2e3944dfe4d206dc52846f8b81193fc279a333ff898dd033e07be4
Package config hash: 5d32e2c150de1cc22d522f94d86500c739f434439ae064e35791ac795932a1f21509c3d0fcf9f2351e7901a32601190bd4cb640799620a44d0ff6d9244aef57c

---------------------------------------------------------------------------------------------------------
15 changes: 14 additions & 1 deletion dev/breeze/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,20 @@ dependencies = [
"black>=23.11.0",
"click>=8.1.7",
"filelock>=3.13.0",
"flit>=3.5.0",
#
# We pin flit in order to make sure reproducibility of provider packages is maintained
# It turns out that when packages are prepared metadata version in the produced packages
# is taken from the front-end not from the backend, so in order to make sure that the
# packages are reproducible, we should pin both backend in "build-system" and frontend in
# "dependencies" of the environment that is used to build the packages.
#
# TODO(potiuk): automate bumping the version of flit in breeze and sync it with
# the version in the template for provider packages with pre-commit also add instructions in
# the source packages explaining that reproducibility can only be achieved by using the same
# version of flit front-end to build the package
#
"flit==3.10.1",
"flit-core==3.10.1",
"google-api-python-client>=2.142.0",
"google-auth-httplib2>=0.2.0",
"google-auth-oauthlib>=1.2.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from airflow_breeze.utils.confirm import confirm_action
from airflow_breeze.utils.console import console_print
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, DIST_DIR, OUT_DIR
from airflow_breeze.utils.python_versions import check_python_version
from airflow_breeze.utils.reproducible import get_source_date_epoch, repack_deterministically
from airflow_breeze.utils.run_utils import run_command

Expand Down Expand Up @@ -341,7 +340,6 @@ def remove_old_releases(version, repo_root):
"--version", required=True, help="The release candidate version e.g. 2.4.3rc1", envvar="VERSION"
)
def prepare_airflow_tarball(version: str):
check_python_version()
from packaging.version import Version

airflow_version = Version(version)
Expand All @@ -367,7 +365,6 @@ def prepare_airflow_tarball(version: str):
)
@option_answer
def publish_release_candidate(version, previous_version, github_token):
check_python_version()
from packaging.version import Version

airflow_version = Version(version)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@
generate_providers_metadata_for_package,
get_related_providers,
)
from airflow_breeze.utils.python_versions import check_python_version, get_python_version_list
from airflow_breeze.utils.python_versions import get_python_version_list
from airflow_breeze.utils.reproducible import get_source_date_epoch, repack_deterministically
from airflow_breeze.utils.run_utils import (
run_command,
Expand Down Expand Up @@ -530,7 +530,6 @@ def prepare_airflow_packages(
version_suffix_for_pypi: str,
use_local_hatch: bool,
):
check_python_version()
perform_environment_checks()
fix_ownership_using_docker()
cleanup_python_generated_files()
Expand Down Expand Up @@ -576,7 +575,6 @@ def prepare_airflow_task_sdk_packages(
package_format: str,
use_local_hatch: bool,
):
check_python_version()
perform_environment_checks()
fix_ownership_using_docker()
cleanup_python_generated_files()
Expand Down Expand Up @@ -907,7 +905,6 @@ def prepare_provider_packages(
skip_tag_check: bool,
version_suffix_for_pypi: str,
):
check_python_version(release_provider_packages=True)
perform_environment_checks()
fix_ownership_using_docker()
cleanup_python_generated_files()
Expand Down Expand Up @@ -3182,7 +3179,6 @@ def prepare_helm_chart_tarball(
) -> None:
import yaml

check_python_version()
chart_yaml_file_content = CHART_YAML_FILE.read_text()
chart_yaml_dict = yaml.safe_load(chart_yaml_file_content)
version_in_chart = chart_yaml_dict["version"]
Expand Down Expand Up @@ -3324,8 +3320,6 @@ def prepare_helm_chart_tarball(
@option_dry_run
@option_verbose
def prepare_helm_chart_package(sign_email: str):
check_python_version()

import yaml

from airflow_breeze.utils.kubernetes_utils import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@

# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
#
[build-system]
requires = ["flit_core==3.10.0"]
requires = ["flit_core==3.10.1"]
build-backend = "flit_core.buildapi"

[project]
Expand Down
14 changes: 0 additions & 14 deletions dev/breeze/src/airflow_breeze/utils/python_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,3 @@ def get_python_version_list(python_versions: str) -> list[str]:
)
sys.exit(1)
return python_version_list


def check_python_version(release_provider_packages: bool = False):
if not sys.version_info < (3, 12) and release_provider_packages:
get_console().print("[error]Python 3.12 is not supported.\n")
get_console().print(
"[warning]Please reinstall Breeze using Python 3.9 - 3.11 environment because not all "
"provider packages support Python 3.12 yet.[/]\n\n"
"If you are using uv:\n\n"
" uv tool install --force --reinstall --python 3.9 -e ./dev/breeze\n\n"
"If you are using pipx:\n\n"
" pipx install --python $(which python3.9) --force -e ./dev/breeze\n"
)
sys.exit(1)
3 changes: 0 additions & 3 deletions dev/breeze/src/airflow_breeze/utils/reproducible.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
from subprocess import CalledProcessError, CompletedProcess

from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, OUT_DIR, REPRODUCIBLE_DIR
from airflow_breeze.utils.python_versions import check_python_version
from airflow_breeze.utils.run_utils import run_command


Expand Down Expand Up @@ -91,7 +90,6 @@ def reset(tarinfo):
tarinfo.mtime = timestamp
return tarinfo

check_python_version()
OUT_DIR.mkdir(exist_ok=True)
shutil.rmtree(REPRODUCIBLE_DIR, ignore_errors=True)
REPRODUCIBLE_DIR.mkdir(exist_ok=True)
Expand Down Expand Up @@ -149,7 +147,6 @@ def reset(tarinfo):


def main():
check_python_version()
parser = ArgumentParser()
parser.add_argument("-a", "--archive", help="archive to repack")
parser.add_argument("-o", "--out", help="archive destination")
Expand Down
4 changes: 3 additions & 1 deletion dev/breeze/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0dcf4ce

Please sign in to comment.