diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b80e72..b2e5eca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - App dependencies are now installed into a virtual environment instead of user site-packages. ([#257](https://github.com/heroku/buildpacks-python/pull/257)) +- pip is now installed into its own layer (as a user site-packages install) instead of into system site-packages in the Python layer. ([#258](https://github.com/heroku/buildpacks-python/pull/258)) ## [0.15.0] - 2024-08-07 diff --git a/src/errors.rs b/src/errors.rs index 0875dab..de9d602 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,4 +1,5 @@ use crate::django::DjangoCollectstaticError; +use crate::layers::pip::PipLayerError; use crate::layers::pip_dependencies::PipDependenciesLayerError; use crate::layers::python::PythonLayerError; use crate::package_manager::DeterminePackageManagerError; @@ -46,6 +47,7 @@ fn on_buildpack_error(error: BuildpackError) { BuildpackError::DjangoCollectstatic(error) => on_django_collectstatic_error(error), BuildpackError::DjangoDetection(error) => on_django_detection_error(&error), BuildpackError::PipDependenciesLayer(error) => on_pip_dependencies_layer_error(error), + BuildpackError::PipLayer(error) => on_pip_layer_error(error), BuildpackError::PythonLayer(error) => on_python_layer_error(error), BuildpackError::PythonVersion(error) => on_python_version_error(error), }; @@ -126,28 +128,6 @@ fn on_python_version_error(error: PythonVersionError) { fn on_python_layer_error(error: PythonLayerError) { match error { - PythonLayerError::BootstrapPipCommand(error) => match error { - StreamedCommandError::Io(io_error) => log_io_error( - "Unable to bootstrap pip", - "running the command to install pip", - &io_error, - ), - StreamedCommandError::NonZeroExitStatus(exit_status) => log_error( - "Unable to bootstrap pip", - formatdoc! {" - The command to install pip did not exit successfully ({exit_status}). - - See the log output above for more information. - - In some cases, this happens due to an unstable network connection. - Please try again to see if the error resolves itself. - - If that does not help, check the status of PyPI (the upstream Python - package repository service), here: - https://status.python.org - "}, - ), - }, PythonLayerError::DownloadUnpackPythonArchive(error) => match error { DownloadUnpackArchiveError::Request(ureq_error) => log_error( "Unable to download Python", @@ -166,11 +146,6 @@ fn on_python_layer_error(error: PythonLayerError) { &io_error, ), }, - PythonLayerError::LocateBundledPip(io_error) => log_io_error( - "Unable to locate the bundled copy of pip", - "locating the pip wheel file bundled inside the Python 'ensurepip' module", - &io_error, - ), // This error will change once the Python version is validated against a manifest. // TODO: (W-12613425) Write the supported Python versions inline, instead of linking out to Dev Center. // TODO: Decide how to explain to users how stacks, base images and builder images versions relate to each other. @@ -189,6 +164,38 @@ fn on_python_layer_error(error: PythonLayerError) { }; } +fn on_pip_layer_error(error: PipLayerError) { + match error { + PipLayerError::InstallPipCommand(error) => match error { + StreamedCommandError::Io(io_error) => log_io_error( + "Unable to install pip", + "running 'python' to install pip", + &io_error, + ), + StreamedCommandError::NonZeroExitStatus(exit_status) => log_error( + "Unable to install pip", + formatdoc! {" + The command to install pip did not exit successfully ({exit_status}). + + See the log output above for more information. + + In some cases, this happens due to an unstable network connection. + Please try again to see if the error resolves itself. + + If that does not help, check the status of PyPI (the upstream Python + package repository service), here: + https://status.python.org + "}, + ), + }, + PipLayerError::LocateBundledPip(io_error) => log_io_error( + "Unable to locate the bundled copy of pip", + "locating the pip wheel file bundled inside the Python 'ensurepip' module", + &io_error, + ), + }; +} + fn on_pip_dependencies_layer_error(error: PipDependenciesLayerError) { match error { PipDependenciesLayerError::CreateVenvCommand(error) => match error { @@ -210,7 +217,7 @@ fn on_pip_dependencies_layer_error(error: PipDependenciesLayerError) { PipDependenciesLayerError::PipInstallCommand(error) => match error { StreamedCommandError::Io(io_error) => log_io_error( "Unable to install dependencies using pip", - "running the 'pip install' command to install the application's dependencies", + "running 'pip install' to install the app's dependencies", &io_error, ), // TODO: Add more suggestions here as to causes (eg network, invalid requirements.txt, diff --git a/src/layers/mod.rs b/src/layers/mod.rs index 74c1faa..ab0a91e 100644 --- a/src/layers/mod.rs +++ b/src/layers/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod pip; pub(crate) mod pip_cache; pub(crate) mod pip_dependencies; pub(crate) mod python; diff --git a/src/layers/pip.rs b/src/layers/pip.rs new file mode 100644 index 0000000..a828621 --- /dev/null +++ b/src/layers/pip.rs @@ -0,0 +1,143 @@ +use crate::packaging_tool_versions::PIP_VERSION; +use crate::python_version::PythonVersion; +use crate::utils::StreamedCommandError; +use crate::{utils, BuildpackError, PythonBuildpack}; +use libcnb::build::BuildContext; +use libcnb::data::layer_name; +use libcnb::layer::{ + CachedLayerDefinition, EmptyLayerCause, InvalidMetadataAction, LayerState, RestoredLayerAction, +}; +use libcnb::layer_env::{LayerEnv, ModificationBehavior, Scope}; +use libcnb::Env; +use libherokubuildpack::log::log_info; +use serde::{Deserialize, Serialize}; +use std::io; +use std::path::Path; +use std::process::Command; + +/// Creates a layer containing pip. +pub(crate) fn install_pip( + context: &BuildContext, + env: &mut Env, + python_version: &PythonVersion, + python_layer_path: &Path, +) -> Result<(), libcnb::Error> { + let new_metadata = PipLayerMetadata { + python_version: python_version.to_string(), + pip_version: PIP_VERSION.to_string(), + }; + + let layer = context.cached_layer( + layer_name!("pip"), + CachedLayerDefinition { + build: true, + launch: true, + invalid_metadata_action: &|_| InvalidMetadataAction::DeleteLayer, + restored_layer_action: &|cached_metadata: &PipLayerMetadata, _| { + let cached_pip_version = cached_metadata.pip_version.clone(); + if cached_metadata == &new_metadata { + (RestoredLayerAction::KeepLayer, cached_pip_version) + } else { + (RestoredLayerAction::DeleteLayer, cached_pip_version) + } + }, + }, + )?; + + let mut layer_env = LayerEnv::new() + // We use a curated pip version, so disable the update check to speed up pip invocations, + // reduce build log spam and prevent users from thinking they need to manually upgrade. + // https://pip.pypa.io/en/stable/cli/pip/#cmdoption-disable-pip-version-check + .chainable_insert( + Scope::All, + ModificationBehavior::Override, + "PIP_DISABLE_PIP_VERSION_CHECK", + "1", + ) + // Move the Python user base directory to this layer instead of under HOME: + // https://docs.python.org/3/using/cmdline.html#envvar-PYTHONUSERBASE + .chainable_insert( + Scope::All, + ModificationBehavior::Override, + "PYTHONUSERBASE", + layer.path(), + ); + + match layer.state { + LayerState::Restored { + cause: ref cached_pip_version, + } => { + log_info(format!("Using cached pip {cached_pip_version}")); + } + LayerState::Empty { ref cause } => { + match cause { + EmptyLayerCause::InvalidMetadataAction { .. } => { + log_info("Discarding cached pip since its layer metadata can't be parsed"); + } + EmptyLayerCause::RestoredLayerAction { + cause: cached_pip_version, + } => { + log_info(format!("Discarding cached pip {cached_pip_version}")); + } + EmptyLayerCause::NewlyCreated => {} + } + + log_info(format!("Installing pip {PIP_VERSION}")); + + // We use the pip wheel bundled within Python's standard library to install our chosen + // pip version, since it's faster than `ensurepip` followed by an upgrade in place. + let bundled_pip_module_path = + utils::bundled_pip_module_path(python_layer_path, python_version) + .map_err(PipLayerError::LocateBundledPip)?; + + utils::run_command_and_stream_output( + Command::new("python") + .args([ + &bundled_pip_module_path.to_string_lossy(), + "install", + // There is no point using pip's cache here, since the layer itself will be cached. + "--no-cache-dir", + "--no-input", + "--no-warn-script-location", + "--quiet", + "--user", + format!("pip=={PIP_VERSION}").as_str(), + ]) + .env_clear() + .envs(&layer_env.apply(Scope::Build, env)), + ) + .map_err(PipLayerError::InstallPipCommand)?; + + layer.write_metadata(new_metadata)?; + } + } + + layer.write_env(&layer_env)?; + // Required to pick up the automatic PATH env var. See: https://github.com/heroku/libcnb.rs/issues/842 + layer_env = layer.read_env()?; + env.clone_from(&layer_env.apply(Scope::Build, env)); + + Ok(()) +} + +// pip's wheel is a pure Python package with no dependencies, so the layer is not arch or distro +// specific. However, the generated .pyc files vary by Python version. +#[derive(Deserialize, PartialEq, Serialize)] +#[serde(deny_unknown_fields)] +struct PipLayerMetadata { + python_version: String, + pip_version: String, +} + +/// Errors that can occur when installing pip into a layer. +#[derive(Debug)] +pub(crate) enum PipLayerError { + InstallPipCommand(StreamedCommandError), + LocateBundledPip(io::Error), +} + +impl From for libcnb::Error { + fn from(error: PipLayerError) -> Self { + Self::BuildpackError(BuildpackError::PipLayer(error)) + } +} diff --git a/src/layers/pip_cache.rs b/src/layers/pip_cache.rs index fa21766..75196c7 100644 --- a/src/layers/pip_cache.rs +++ b/src/layers/pip_cache.rs @@ -34,9 +34,9 @@ pub(crate) fn prepare_pip_cache( invalid_metadata_action: &|_| InvalidMetadataAction::DeleteLayer, restored_layer_action: &|cached_metadata: &PipCacheLayerMetadata, _| { if cached_metadata == &new_metadata { - Ok(RestoredLayerAction::KeepLayer) + RestoredLayerAction::KeepLayer } else { - Ok(RestoredLayerAction::DeleteLayer) + RestoredLayerAction::DeleteLayer } }, }, diff --git a/src/layers/python.rs b/src/layers/python.rs index af0ae05..bd8438c 100644 --- a/src/layers/python.rs +++ b/src/layers/python.rs @@ -1,6 +1,5 @@ -use crate::packaging_tool_versions::PIP_VERSION; use crate::python_version::PythonVersion; -use crate::utils::{self, DownloadUnpackArchiveError, StreamedCommandError}; +use crate::utils::{self, DownloadUnpackArchiveError}; use crate::{BuildpackError, PythonBuildpack}; use libcnb::build::BuildContext; use libcnb::data::layer_name; @@ -12,34 +11,18 @@ use libcnb::Env; use libherokubuildpack::log::log_info; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; -use std::process::Command; -use std::{fs, io}; -/// Creates a layer containing the Python runtime and pip. -// -// We install both Python and the packaging tools into the same layer, since: -// - We don't want to mix buildpack/packaging dependencies with the app's own dependencies -// (for a start, we need pip installed to even install the user's own dependencies, plus -// want to keep caching separate), so cannot install the packaging tools into the user -// site-packages directory. -// - We don't want to install the packaging tools into an arbitrary directory added to -// `PYTHONPATH`, since directories added to `PYTHONPATH` take precedence over the Python -// stdlib (unlike the system or user site-packages directories), and so can result in hard -// to debug stdlib shadowing problems that users won't encounter locally. -// - This leaves just the system site-packages directory, which exists within the Python -// installation directory and Python does not support moving it elsewhere. -// - It matches what both local and official Docker image environments do. -pub(crate) fn install_python_and_packaging_tools( +/// Creates a layer containing the Python runtime. +pub(crate) fn install_python( context: &BuildContext, env: &mut Env, python_version: &PythonVersion, -) -> Result<(), libcnb::Error> { +) -> Result> { let new_metadata = PythonLayerMetadata { arch: context.target.arch.clone(), distro_name: context.target.distro_name.clone(), distro_version: context.target.distro_version.clone(), python_version: python_version.to_string(), - pip_version: PIP_VERSION.to_string(), }; let layer = context.cached_layer( @@ -49,11 +32,18 @@ pub(crate) fn install_python_and_packaging_tools( launch: true, invalid_metadata_action: &|_| InvalidMetadataAction::DeleteLayer, restored_layer_action: &|cached_metadata: &PythonLayerMetadata, _| { + let cached_python_version = cached_metadata.python_version.clone(); let reasons = cache_invalidation_reasons(cached_metadata, &new_metadata); if reasons.is_empty() { - Ok((RestoredLayerAction::KeepLayer, Vec::new())) + Ok(( + RestoredLayerAction::KeepLayer, + (cached_python_version, Vec::new()), + )) } else { - Ok((RestoredLayerAction::DeleteLayer, reasons)) + Ok(( + RestoredLayerAction::DeleteLayer, + (cached_python_version, reasons), + )) } }, }, @@ -61,19 +51,25 @@ pub(crate) fn install_python_and_packaging_tools( let layer_path = layer.path(); match layer.state { - LayerState::Restored { .. } => { - log_info(format!( - "Using cached Python {python_version} and pip {PIP_VERSION}" - )); + LayerState::Restored { + cause: (ref cached_python_version, _), + } => { + log_info(format!("Using cached Python {cached_python_version}")); } LayerState::Empty { ref cause } => { match cause { EmptyLayerCause::InvalidMetadataAction { .. } => { - log_info("Discarding cache since the buildpack cache format has changed"); + log_info("Discarding cached Python since its layer metadata can't be parsed"); } - EmptyLayerCause::RestoredLayerAction { cause: reasons } => { + EmptyLayerCause::RestoredLayerAction { + cause: (ref cached_python_version, reasons), + } => { + // TODO: Move this type of detailed change messaging to a build config summary + // at the start of the build. This message could then be simplified to: + // "Discarding cached Python X.Y.Z (ubuntu-24.04, arm64)" + // ...and the "Installing" message changed similarly. log_info(format!( - "Discarding cache since:\n - {}", + "Discarding cached Python {cached_python_version} since:\n - {}", reasons.join("\n - ") )); } @@ -103,40 +99,7 @@ pub(crate) fn install_python_and_packaging_tools( layer_env = layer.read_env()?; env.clone_from(&layer_env.apply(Scope::Build, env)); - if let LayerState::Restored { .. } = layer.state { - return Ok(()); - } - - log_info(format!("Installing pip {PIP_VERSION}")); - - let python_stdlib_dir = layer_path.join(format!( - "lib/python{}.{}", - python_version.major, python_version.minor - )); - - // Python bundles pip within its standard library, which we can use to install our chosen - // pip version from PyPI, saving us from having to download the usual pip bootstrap script. - let bundled_pip_module_path = - bundled_pip_module_path(&python_stdlib_dir).map_err(PythonLayerError::LocateBundledPip)?; - - utils::run_command_and_stream_output( - Command::new("python") - .args([ - &bundled_pip_module_path.to_string_lossy(), - "install", - // There is no point using pip's cache here, since the layer itself will be cached. - "--no-cache-dir", - "--no-input", - "--quiet", - format!("pip=={PIP_VERSION}").as_str(), - ]) - .current_dir(&context.app_dir) - .env_clear() - .envs(&*env), - ) - .map_err(PythonLayerError::BootstrapPipCommand)?; - - Ok(()) + Ok(layer_path) } #[derive(Clone, Deserialize, Serialize)] @@ -146,7 +109,6 @@ struct PythonLayerMetadata { distro_name: String, distro_version: String, python_version: String, - pip_version: String, } /// Compare cached layer metadata to the new layer metadata to determine if the cache should be @@ -165,7 +127,6 @@ fn cache_invalidation_reasons( distro_name: cached_distro_name, distro_version: cached_distro_version, python_version: cached_python_version, - pip_version: cached_pip_version, } = cached_metadata; let PythonLayerMetadata { @@ -173,7 +134,6 @@ fn cache_invalidation_reasons( distro_name, distro_version, python_version, - pip_version, } = new_metadata; let mut reasons = Vec::new(); @@ -196,12 +156,6 @@ fn cache_invalidation_reasons( )); } - if cached_pip_version != pip_version { - reasons.push(format!( - "The pip version has changed from {cached_pip_version} to {pip_version}" - )); - } - reasons } @@ -230,16 +184,6 @@ fn generate_layer_env(layer_path: &Path, python_version: &PythonVersion) -> Laye "LANG", "C.UTF-8", ) - // We use a curated pip version, so disable the update check to speed up pip invocations, - // reduce build log spam and prevent users from thinking they need to manually upgrade. - // This uses an env var (rather than the `--disable-pip-version-check` arg) so that it also - // takes effect for any pip invocations in later buildpacks or when debugging at run-time. - .chainable_insert( - Scope::All, - ModificationBehavior::Override, - "PIP_DISABLE_PIP_VERSION_CHECK", - "1", - ) // We have to set `PKG_CONFIG_PATH` explicitly, since the automatic path set by lifecycle/libcnb // is `/pkgconfig/`, whereas Python's pkgconfig files are at `/lib/pkgconfig/`. .chainable_insert( @@ -277,20 +221,17 @@ fn generate_layer_env(layer_path: &Path, python_version: &PythonVersion) -> Laye ) // By default, Python's cached bytecode files (`.pyc` files) embed the last-modified time of // their `.py` source file, so Python can determine when they need regenerating. This causes - // `.pyc` files (and thus layer SHA256) to be non-deterministic in cases where the source - // file's last-modified time can vary (such as for packages installed by pip). In addition, - // when lifecycle exports layers it resets the timestamps on all files to a fixed value: - // https://buildpacks.io/docs/features/reproducibility/#consequences-and-caveats + // them (and the layer digest) to be non-deterministic in cases where the source file's + // last-modified time can vary (such as for installed packages). In addition, when lifecycle + // exports layers it resets the timestamps on all files to a fixed value: + // https://buildpacks.io/docs/for-app-developers/concepts/reproducibility/#consequences-and-caveats // // At run-time, this means the `.pyc`'s embedded timestamps no longer match the timestamps // of the original `.py` files, causing Python to regenerate the bytecode, and so losing any // benefit of having kept the `.pyc` files (at the cost of a larger app image). // - // We could delete the `.pyc` files at the end of this buildpack's build phase, however: - // - This means they need to be regenerated at app boot, slowing boot times. - // (For a simple Django project on a Perf-M, boot time increases from ~0.5s to ~1.5s.) - // - If a later buildpack runs any of the Python files added by this buildpack, then the - // timestamp based `.pyc` files will be created again, re-introducing non-determinism. + // We could delete the `.pyc` files at the end of this buildpack's build phase, or suppress + // their creation using `PYTHONDONTWRITEBYTECODE=1`, but this would mean slower app boot. // // Instead, we use the hash-based cache files mode added in Python 3.7+, which embeds a hash // of the original `.py` file in the `.pyc` file instead of the timestamp: @@ -303,15 +244,6 @@ fn generate_layer_env(layer_path: &Path, python_version: &PythonVersion) -> Laye // // Note: Both the CLI args and the env var only apply to usages of `compileall` or `py_compile`, // and not `.pyc` generation as part of Python importing a file during normal operation. - // - // We use the env var, since: - // - pip calls `compileall` itself after installing packages, and doesn't allow us to - // customise the options passed to it, which would mean we'd have to pass `--no-compile` - // to pip followed by running `compileall` manually ourselves, meaning more complexity - // every time we (or a later buildpack) use `pip install`. - // - When we add support for Poetry, we'll have to use an env var regardless, since Poetry - // doesn't allow customising the options passed to its internal pip invocations, so we'd - // have no way of passing `--no-compile` to pip. .chainable_insert( Scope::Build, ModificationBehavior::Default, @@ -321,42 +253,15 @@ fn generate_layer_env(layer_path: &Path, python_version: &PythonVersion) -> Laye // the pip install. As such, we cannot use a zero value since the ZIP file format doesn't // support dates before 1980. Instead, we use a value equivalent to `1980-01-01T00:00:01Z`, // for parity with that used by lifecycle: - // https://github.com/buildpacks/lifecycle/blob/v0.15.3/archive/writer.go#L12 + // https://github.com/buildpacks/lifecycle/blob/v0.20.1/archive/writer.go#L12 "315532801", ) } -/// The path to the pip module bundled in Python's standard library. -fn bundled_pip_module_path(python_stdlib_dir: &Path) -> io::Result { - let bundled_wheels_dir = python_stdlib_dir.join("ensurepip/_bundled"); - - // The wheel filename includes the pip version (for example `pip-XX.Y-py3-none-any.whl`), - // which varies from one Python release to the next (including between patch releases). - // As such, we have to find the wheel based on the known filename prefix of `pip-`. - for entry in fs::read_dir(bundled_wheels_dir)? { - let entry = entry?; - if entry.file_name().to_string_lossy().starts_with("pip-") { - let pip_wheel_path = entry.path(); - // The pip module exists inside the pip wheel (which is a zip file), however, - // Python can load it directly by appending the module name to the zip filename, - // as though it were a path. For example: `pip-XX.Y-py3-none-any.whl/pip` - let pip_module_path = pip_wheel_path.join("pip"); - return Ok(pip_module_path); - } - } - - Err(io::Error::new( - io::ErrorKind::NotFound, - "No files found matching the pip wheel filename prefix", - )) -} - -/// Errors that can occur when installing Python and required packaging tools into a layer. +/// Errors that can occur when installing Python into a layer. #[derive(Debug)] pub(crate) enum PythonLayerError { - BootstrapPipCommand(StreamedCommandError), DownloadUnpackPythonArchive(DownloadUnpackArchiveError), - LocateBundledPip(io::Error), PythonArchiveNotFound { python_version: PythonVersion }, } @@ -376,7 +281,6 @@ mod tests { distro_name: "ubuntu".to_string(), distro_version: "22.04".to_string(), python_version: "3.11.0".to_string(), - pip_version: "A.B.C".to_string(), } } @@ -411,7 +315,6 @@ mod tests { distro_name: "debian".to_string(), distro_version: "12".to_string(), python_version: "3.11.1".to_string(), - pip_version: "A.B.C-new".to_string(), }; assert_eq!( cache_invalidation_reasons(&cached_metadata, &new_metadata), @@ -419,7 +322,6 @@ mod tests { "The CPU architecture has changed from amd64 to arm64", "The OS has changed from ubuntu-22.04 to debian-12", "The Python version has changed from 3.11.0 to 3.11.1", - "The pip version has changed from A.B.C to A.B.C-new", ] ); } @@ -429,7 +331,6 @@ mod tests { let mut base_env = Env::new(); base_env.insert("CPATH", "/base"); base_env.insert("LANG", "this-should-be-overridden"); - base_env.insert("PIP_DISABLE_PIP_VERSION_CHECK", "this-should-be-overridden"); base_env.insert("PKG_CONFIG_PATH", "/base"); base_env.insert("PYTHONHOME", "this-should-be-overridden"); base_env.insert("PYTHONUNBUFFERED", "this-should-be-overridden"); @@ -448,7 +349,6 @@ mod tests { [ ("CPATH", "/layer-dir/include/python3.11:/base"), ("LANG", "C.UTF-8"), - ("PIP_DISABLE_PIP_VERSION_CHECK", "1"), ("PKG_CONFIG_PATH", "/layer-dir/lib/pkgconfig:/base"), ("PYTHONHOME", "/layer-dir"), ("PYTHONUNBUFFERED", "1"), @@ -460,7 +360,6 @@ mod tests { [ ("CPATH", "/base"), ("LANG", "C.UTF-8"), - ("PIP_DISABLE_PIP_VERSION_CHECK", "1"), ("PKG_CONFIG_PATH", "/base"), ("PYTHONHOME", "/layer-dir"), ("PYTHONUNBUFFERED", "1"), diff --git a/src/main.rs b/src/main.rs index d35bbb2..fa533d0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,10 @@ mod runtime_txt; mod utils; use crate::django::DjangoCollectstaticError; +use crate::layers::pip::PipLayerError; use crate::layers::pip_dependencies::PipDependenciesLayerError; -use crate::layers::python::{self, PythonLayerError}; -use crate::layers::{pip_cache, pip_dependencies}; +use crate::layers::python::PythonLayerError; +use crate::layers::{pip, pip_cache, pip_dependencies, python}; use crate::package_manager::{DeterminePackageManagerError, PackageManager}; use crate::python_version::PythonVersionError; use libcnb::build::{BuildContext, BuildResult, BuildResultBuilder}; @@ -60,14 +61,14 @@ impl Buildpack for PythonBuildpack { // making sure that buildpack env vars take precedence in layers envs and command usage. let mut env = Env::from_current(); - // Create the layer containing the Python runtime and pip. - log_header("Installing Python and pip"); - python::install_python_and_packaging_tools(&context, &mut env, &python_version)?; + log_header("Installing Python"); + let python_layer_path = python::install_python(&context, &mut env, &python_version)?; - // Create the layers for the application dependencies and package manager cache. // In the future support will be added for package managers other than pip. let dependencies_layer_dir = match package_manager { PackageManager::Pip => { + log_header("Installing pip"); + pip::install_pip(&context, &mut env, &python_version, &python_layer_path)?; log_header("Installing dependencies using pip"); pip_cache::prepare_pip_cache(&context, &mut env, &python_version)?; pip_dependencies::install_dependencies(&context, &mut env)? @@ -102,7 +103,9 @@ pub(crate) enum BuildpackError { DjangoDetection(io::Error), /// Errors installing the project's dependencies into a layer using pip. PipDependenciesLayer(PipDependenciesLayerError), - /// Errors installing Python and required packaging tools into a layer. + /// Errors installing pip into a layer. + PipLayer(PipLayerError), + /// Errors installing Python into a layer. PythonLayer(PythonLayerError), /// Errors determining which Python version to use for a project. PythonVersion(PythonVersionError), diff --git a/src/utils.rs b/src/utils.rs index 5206107..f4cd724 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,5 @@ -use std::path::Path; +use crate::python_version::PythonVersion; +use std::path::{Path, PathBuf}; use std::process::{Command, ExitStatus, Output}; use std::{fs, io}; use tar::Archive; @@ -39,6 +40,37 @@ pub(crate) enum DownloadUnpackArchiveError { Unpack(io::Error), } +/// Determine the path to the pip module bundled in Python's standard library. +pub(crate) fn bundled_pip_module_path( + python_layer_path: &Path, + python_version: &PythonVersion, +) -> io::Result { + let bundled_wheels_dir = python_layer_path.join(format!( + "lib/python{}.{}/ensurepip/_bundled", + python_version.major, python_version.minor + )); + + // The wheel filename includes the pip version (for example `pip-XX.Y-py3-none-any.whl`), + // which varies from one Python release to the next (including between patch releases). + // As such, we have to find the wheel based on the known filename prefix of `pip-`. + for entry in fs::read_dir(bundled_wheels_dir)? { + let entry = entry?; + if entry.file_name().to_string_lossy().starts_with("pip-") { + let pip_wheel_path = entry.path(); + // The pip module exists inside the pip wheel (which is a zip file), however, + // Python can load it directly by appending the module name to the zip filename, + // as though it were a path. For example: `pip-XX.Y-py3-none-any.whl/pip` + let pip_module_path = pip_wheel_path.join("pip"); + return Ok(pip_module_path); + } + } + + Err(io::Error::new( + io::ErrorKind::NotFound, + "No files found matching the pip wheel filename prefix", + )) +} + /// A helper for running an external process using [`Command`], that streams stdout/stderr /// to the user and checks that the exit status of the process was non-zero. pub(crate) fn run_command_and_stream_output( diff --git a/tests/fixtures/testing_buildpack/bin/build b/tests/fixtures/testing_buildpack/bin/build index ac86b76..37283f2 100755 --- a/tests/fixtures/testing_buildpack/bin/build +++ b/tests/fixtures/testing_buildpack/bin/build @@ -3,7 +3,7 @@ # Check that: # - The correct env vars are set for later buildpacks. # - Python's sys.path is correct. -# - The correct version of pip was installed. +# - The correct version of pip was installed, into its own layer. # - Both the package manager and Python can find the typing-extensions package. # - The typing-extensions package was installed into a separate dependencies layer. diff --git a/tests/pip_test.rs b/tests/pip_test.rs index d4518d5..dbc4079 100644 --- a/tests/pip_test.rs +++ b/tests/pip_test.rs @@ -22,8 +22,10 @@ fn pip_basic_install_and_cache_reuse() { No Python version specified, using the current default of Python {DEFAULT_PYTHON_VERSION}. To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes - [Installing Python and pip] + [Installing Python] Installing Python {DEFAULT_PYTHON_VERSION} + + [Installing pip] Installing pip {PIP_VERSION} [Installing dependencies using pip] @@ -38,15 +40,16 @@ fn pip_basic_install_and_cache_reuse() { ## Testing buildpack ## CPATH=/layers/heroku_python/venv/include:/layers/heroku_python/python/include/python3.12:/layers/heroku_python/python/include LANG=C.UTF-8 - LD_LIBRARY_PATH=/layers/heroku_python/venv/lib:/layers/heroku_python/python/lib - LIBRARY_PATH=/layers/heroku_python/venv/lib:/layers/heroku_python/python/lib - PATH=/layers/heroku_python/venv/bin:/layers/heroku_python/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + LD_LIBRARY_PATH=/layers/heroku_python/venv/lib:/layers/heroku_python/python/lib:/layers/heroku_python/pip/lib + LIBRARY_PATH=/layers/heroku_python/venv/lib:/layers/heroku_python/python/lib:/layers/heroku_python/pip/lib + PATH=/layers/heroku_python/venv/bin:/layers/heroku_python/python/bin:/layers/heroku_python/pip/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin PIP_CACHE_DIR=/layers/heroku_python/pip-cache PIP_DISABLE_PIP_VERSION_CHECK=1 PIP_PYTHON=/layers/heroku_python/venv PKG_CONFIG_PATH=/layers/heroku_python/python/lib/pkgconfig PYTHONHOME=/layers/heroku_python/python PYTHONUNBUFFERED=1 + PYTHONUSERBASE=/layers/heroku_python/pip SOURCE_DATE_EPOCH=315532801 VIRTUAL_ENV=/layers/heroku_python/venv @@ -56,7 +59,7 @@ fn pip_basic_install_and_cache_reuse() { '/layers/heroku_python/python/lib/python3.12/lib-dynload', '/layers/heroku_python/venv/lib/python3.12/site-packages'] - pip {PIP_VERSION} from /layers/heroku_python/python/lib/python3.12/site-packages/pip (python 3.12) + pip {PIP_VERSION} from /layers/heroku_python/pip/lib/python3.12/site-packages/pip (python 3.12) Package Version ----------------- ------- typing_extensions 4.12.2 @@ -82,12 +85,13 @@ fn pip_basic_install_and_cache_reuse() { command_output.stdout, formatdoc! {" LANG=C.UTF-8 - LD_LIBRARY_PATH=/layers/heroku_python/venv/lib:/layers/heroku_python/python/lib - PATH=/layers/heroku_python/venv/bin:/layers/heroku_python/python/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + LD_LIBRARY_PATH=/layers/heroku_python/venv/lib:/layers/heroku_python/python/lib:/layers/heroku_python/pip/lib + PATH=/layers/heroku_python/venv/bin:/layers/heroku_python/python/bin:/layers/heroku_python/pip/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin PIP_DISABLE_PIP_VERSION_CHECK=1 PIP_PYTHON=/layers/heroku_python/venv PYTHONHOME=/layers/heroku_python/python PYTHONUNBUFFERED=1 + PYTHONUSERBASE=/layers/heroku_python/pip VIRTUAL_ENV=/layers/heroku_python/venv Package Version @@ -105,8 +109,11 @@ fn pip_basic_install_and_cache_reuse() { No Python version specified, using the current default of Python {DEFAULT_PYTHON_VERSION}. To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes - [Installing Python and pip] - Using cached Python {DEFAULT_PYTHON_VERSION} and pip {PIP_VERSION} + [Installing Python] + Using cached Python {DEFAULT_PYTHON_VERSION} + + [Installing pip] + Using cached pip {PIP_VERSION} [Installing dependencies using pip] Using cached pip download/wheel cache @@ -139,10 +146,13 @@ fn pip_cache_invalidation_python_version_changed() { No Python version specified, using the current default of Python {DEFAULT_PYTHON_VERSION}. To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes - [Installing Python and pip] - Discarding cache since: + [Installing Python] + Discarding cached Python {LATEST_PYTHON_3_11} since: - The Python version has changed from {LATEST_PYTHON_3_11} to {DEFAULT_PYTHON_VERSION} Installing Python {DEFAULT_PYTHON_VERSION} + + [Installing pip] + Discarding cached pip {PIP_VERSION} Installing pip {PIP_VERSION} [Installing dependencies using pip] @@ -181,11 +191,11 @@ fn pip_cache_previous_buildpack_version() { No Python version specified, using the current default of Python {DEFAULT_PYTHON_VERSION}. To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes - [Installing Python and pip] - Discarding cache since: - - The Python version has changed from 3.12.4 to {DEFAULT_PYTHON_VERSION} - - The pip version has changed from 24.1.2 to {PIP_VERSION} + [Installing Python] + Discarding cached Python since its layer metadata can't be parsed Installing Python {DEFAULT_PYTHON_VERSION} + + [Installing pip] Installing pip {PIP_VERSION} [Installing dependencies using pip] diff --git a/tests/python_version_test.rs b/tests/python_version_test.rs index 52e1cfa..f96f869 100644 --- a/tests/python_version_test.rs +++ b/tests/python_version_test.rs @@ -1,4 +1,3 @@ -use crate::packaging_tool_versions::PIP_VERSION; use crate::tests::{ builder, default_build_config, DEFAULT_PYTHON_VERSION, LATEST_PYTHON_3_10, LATEST_PYTHON_3_11, LATEST_PYTHON_3_12, LATEST_PYTHON_3_7, LATEST_PYTHON_3_8, LATEST_PYTHON_3_9, @@ -20,7 +19,7 @@ fn python_version_unspecified() { No Python version specified, using the current default of Python {DEFAULT_PYTHON_VERSION}. To use a different version, see: https://devcenter.heroku.com/articles/python-runtimes - [Installing Python and pip] + [Installing Python] Installing Python {DEFAULT_PYTHON_VERSION} "} ); @@ -86,9 +85,8 @@ fn builds_with_python_version(fixture_path: &str, python_version: &str) { [Determining Python version] Using Python version {python_version} specified in runtime.txt - [Installing Python and pip] + [Installing Python] Installing Python {python_version} - Installing pip {PIP_VERSION} "} ); // There's no sensible default process type we can set for Python apps. @@ -193,7 +191,7 @@ fn runtime_txt_non_existent_version() { fn rejects_non_existent_python_version(fixture_path: &str, python_version: &str) { TestRunner::default().build( - default_build_config( fixture_path).expected_pack_result(PackResult::Failure), + default_build_config(fixture_path).expected_pack_result(PackResult::Failure), |context| { assert_contains!( context.pack_stderr,