From 4f2349119cf341eedf738d06a50ed136a5f207db Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 9 Sep 2024 16:19:15 -0400 Subject: [PATCH] Add support for dynamic cache keys (#7136) ## Summary This PR adds a more flexible cache invalidation abstraction for uv, and uses that new abstraction to improve support for dynamic metadata. Specifically, instead of relying solely on a timestamp, we now pass around a `CacheInfo` struct which (as of now) contains `Option` and `Option`. The `CacheInfo` is saved in `dist-info` as `uv_cache.json`, so we can test already-installed distributions for cache validity (along with testing _cached_ distributions for cache validity). Beyond the defaults (`pyproject.toml`, `setup.py`, and `setup.cfg` changes), users can also specify additional cache keys, and it's easy for us to extend support in the future. Right now, cache keys can either be instructions to include the current commit (for `setuptools_scm` and similar) or file paths (for `hatch-requirements-txt` and similar): ```toml [tool.uv] cache-keys = [{ file = "requirements.txt" }, { git = true }] ``` This change should be fully backwards compatible. Closes https://github.com/astral-sh/uv/issues/6964. Closes https://github.com/astral-sh/uv/issues/6255. Closes https://github.com/astral-sh/uv/issues/6860. --- Cargo.lock | 22 ++ Cargo.toml | 1 + crates/distribution-types/Cargo.toml | 1 + crates/distribution-types/src/cached.rs | 22 ++ crates/distribution-types/src/installed.rs | 25 +- crates/install-wheel-rs/Cargo.toml | 3 +- crates/install-wheel-rs/Readme.md | 15 -- crates/install-wheel-rs/src/linker.rs | 16 +- crates/install-wheel-rs/src/wheel.rs | 24 +- crates/uv-cache-info/Cargo.toml | 21 ++ crates/uv-cache-info/src/cache_info.rs | 174 +++++++++++++ crates/uv-cache-info/src/commit_info.rs | 91 +++++++ crates/uv-cache-info/src/lib.rs | 6 + .../src/timestamp.rs | 2 +- crates/uv-cache/Cargo.toml | 1 + crates/uv-cache/src/by_timestamp.rs | 3 +- crates/uv-cache/src/lib.rs | 3 +- crates/uv-configuration/Cargo.toml | 2 + .../uv-configuration/src/config_settings.rs | 23 ++ .../uv-configuration/src/package_options.rs | 3 +- crates/uv-dispatch/src/lib.rs | 4 + crates/uv-distribution/Cargo.toml | 1 + .../src/distribution_database.rs | 32 ++- crates/uv-distribution/src/download.rs | 14 +- .../src/index/built_wheel_index.rs | 49 ++-- .../uv-distribution/src/index/cached_wheel.rs | 27 +- .../src/source/built_wheel_metadata.rs | 5 +- crates/uv-distribution/src/source/mod.rs | 154 ++++++------ crates/uv-git/Cargo.toml | 2 +- crates/uv-installer/Cargo.toml | 1 + crates/uv-installer/src/installer.rs | 5 + crates/uv-installer/src/plan.rs | 11 +- crates/uv-installer/src/satisfies.rs | 41 ++-- crates/uv-python/Cargo.toml | 1 + crates/uv-python/src/interpreter.rs | 3 +- crates/uv-settings/Cargo.toml | 1 + crates/uv-settings/src/settings.rs | 33 +++ crates/uv-types/src/traits.rs | 5 +- crates/uv/Cargo.toml | 1 + crates/uv/src/commands/tool/install.rs | 3 +- crates/uv/src/commands/tool/run.rs | 3 +- crates/uv/src/lib.rs | 6 +- crates/uv/tests/pip_install.rs | 232 +++++++++++++++++- crates/uv/tests/pip_sync.rs | 2 +- docs/concepts/cache.md | 29 ++- docs/reference/settings.md | 46 ++++ uv.schema.json | 45 ++++ 47 files changed, 1022 insertions(+), 192 deletions(-) delete mode 100644 crates/install-wheel-rs/Readme.md create mode 100644 crates/uv-cache-info/Cargo.toml create mode 100644 crates/uv-cache-info/src/cache_info.rs create mode 100644 crates/uv-cache-info/src/commit_info.rs create mode 100644 crates/uv-cache-info/src/lib.rs rename crates/{uv-cache => uv-cache-info}/src/timestamp.rs (94%) diff --git a/Cargo.lock b/Cargo.lock index c587c6b0b499..993f668d18c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1048,6 +1048,7 @@ dependencies = [ "tracing", "url", "urlencoding", + "uv-cache-info", "uv-fs", "uv-git", "uv-normalize", @@ -1780,6 +1781,7 @@ dependencies = [ "tempfile", "thiserror", "tracing", + "uv-cache-info", "uv-fs", "uv-normalize", "uv-warnings", @@ -4498,6 +4500,7 @@ dependencies = [ "url", "uv-auth", "uv-cache", + "uv-cache-info", "uv-cli", "uv-client", "uv-configuration", @@ -4595,11 +4598,24 @@ dependencies = [ "tempfile", "tracing", "url", + "uv-cache-info", "uv-fs", "uv-normalize", "walkdir", ] +[[package]] +name = "uv-cache-info" +version = "0.0.1" +dependencies = [ + "fs-err", + "schemars", + "serde", + "thiserror", + "toml", + "tracing", +] + [[package]] name = "uv-cli" version = "0.0.1" @@ -4681,6 +4697,7 @@ name = "uv-configuration" version = "0.0.1" dependencies = [ "anyhow", + "cache-key", "clap", "distribution-types", "either", @@ -4696,6 +4713,7 @@ dependencies = [ "url", "uv-auth", "uv-cache", + "uv-cache-info", "uv-normalize", ] @@ -4793,6 +4811,7 @@ dependencies = [ "tracing", "url", "uv-cache", + "uv-cache-info", "uv-client", "uv-configuration", "uv-extract", @@ -4896,6 +4915,7 @@ dependencies = [ "tracing", "url", "uv-cache", + "uv-cache-info", "uv-configuration", "uv-distribution", "uv-extract", @@ -4984,6 +5004,7 @@ dependencies = [ "tracing", "url", "uv-cache", + "uv-cache-info", "uv-client", "uv-extract", "uv-fs", @@ -5116,6 +5137,7 @@ dependencies = [ "thiserror", "toml", "tracing", + "uv-cache-info", "uv-configuration", "uv-fs", "uv-macros", diff --git a/Cargo.toml b/Cargo.toml index b496319b3baa..6724250d342f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ requirements-txt = { path = "crates/requirements-txt" } uv-auth = { path = "crates/uv-auth" } uv-build = { path = "crates/uv-build" } uv-cache = { path = "crates/uv-cache" } +uv-cache-info = { path = "crates/uv-cache-info" } uv-cli = { path = "crates/uv-cli" } uv-client = { path = "crates/uv-client" } uv-configuration = { path = "crates/uv-configuration" } diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index f64a31d915f0..714b14d51ab7 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -19,6 +19,7 @@ pep440_rs = { workspace = true } pep508_rs = { workspace = true, features = ["serde"] } platform-tags = { workspace = true } pypi-types = { workspace = true } +uv-cache-info = { workspace = true } uv-fs = { workspace = true } uv-git = { workspace = true } uv-normalize = { workspace = true } diff --git a/crates/distribution-types/src/cached.rs b/crates/distribution-types/src/cached.rs index eaa95116961a..eb00a7598f4e 100644 --- a/crates/distribution-types/src/cached.rs +++ b/crates/distribution-types/src/cached.rs @@ -5,6 +5,7 @@ use anyhow::{anyhow, Result}; use distribution_filename::WheelFilename; use pep508_rs::VerbatimUrl; use pypi_types::{HashDigest, ParsedDirectoryUrl}; +use uv_cache_info::CacheInfo; use uv_normalize::PackageName; use crate::{ @@ -26,6 +27,7 @@ pub struct CachedRegistryDist { pub filename: WheelFilename, pub path: PathBuf, pub hashes: Vec, + pub cache_info: CacheInfo, } #[derive(Debug, Clone, Hash, PartialEq, Eq)] @@ -36,6 +38,7 @@ pub struct CachedDirectUrlDist { pub editable: bool, pub r#virtual: bool, pub hashes: Vec, + pub cache_info: CacheInfo, } impl CachedDist { @@ -44,6 +47,7 @@ impl CachedDist { remote: Dist, filename: WheelFilename, hashes: Vec, + cache_info: CacheInfo, path: PathBuf, ) -> Self { match remote { @@ -51,11 +55,13 @@ impl CachedDist { filename, path, hashes, + cache_info, }), Dist::Built(BuiltDist::DirectUrl(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, hashes, + cache_info, path, editable: false, r#virtual: false, @@ -64,6 +70,7 @@ impl CachedDist { filename, url: dist.url, hashes, + cache_info, path, editable: false, r#virtual: false, @@ -72,11 +79,13 @@ impl CachedDist { filename, path, hashes, + cache_info, }), Dist::Source(SourceDist::DirectUrl(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, hashes, + cache_info, path, editable: false, r#virtual: false, @@ -85,6 +94,7 @@ impl CachedDist { filename, url: dist.url, hashes, + cache_info, path, editable: false, r#virtual: false, @@ -93,6 +103,7 @@ impl CachedDist { filename, url: dist.url, hashes, + cache_info, path, editable: false, r#virtual: false, @@ -101,6 +112,7 @@ impl CachedDist { filename, url: dist.url, hashes, + cache_info, path, editable: dist.editable, r#virtual: dist.r#virtual, @@ -116,6 +128,14 @@ impl CachedDist { } } + /// Return the [`CacheInfo`] of the distribution. + pub fn cache_info(&self) -> &CacheInfo { + match self { + Self::Registry(dist) => &dist.cache_info, + Self::Url(dist) => &dist.cache_info, + } + } + /// Return the [`ParsedUrl`] of the distribution, if it exists. pub fn parsed_url(&self) -> Result> { match self { @@ -161,12 +181,14 @@ impl CachedDirectUrlDist { filename: WheelFilename, url: VerbatimUrl, hashes: Vec, + cache_info: CacheInfo, path: PathBuf, ) -> Self { Self { filename, url, hashes, + cache_info, path, editable: false, r#virtual: false, diff --git a/crates/distribution-types/src/installed.rs b/crates/distribution-types/src/installed.rs index 151592a3dc7a..2cdac4802081 100644 --- a/crates/distribution-types/src/installed.rs +++ b/crates/distribution-types/src/installed.rs @@ -10,6 +10,7 @@ use url::Url; use distribution_filename::EggInfoFilename; use pep440_rs::Version; use pypi_types::DirectUrl; +use uv_cache_info::CacheInfo; use uv_fs::Simplified; use uv_normalize::PackageName; @@ -35,6 +36,7 @@ pub struct InstalledRegistryDist { pub name: PackageName, pub version: Version, pub path: PathBuf, + pub cache_info: Option, } #[derive(Debug, Clone, Hash, PartialEq, Eq)] @@ -45,6 +47,7 @@ pub struct InstalledDirectUrlDist { pub url: Url, pub editable: bool, pub path: PathBuf, + pub cache_info: Option, } #[derive(Debug, Clone, Hash, PartialEq, Eq)] @@ -90,6 +93,7 @@ impl InstalledDist { let name = PackageName::from_str(name)?; let version = Version::from_str(version).map_err(|err| anyhow!(err))?; + let cache_info = Self::cache_info(path)?; return if let Some(direct_url) = Self::direct_url(path)? { match Url::try_from(&direct_url) { @@ -100,6 +104,7 @@ impl InstalledDist { direct_url: Box::new(direct_url), url, path: path.to_path_buf(), + cache_info, }))), Err(err) => { warn!("Failed to parse direct URL: {err}"); @@ -107,6 +112,7 @@ impl InstalledDist { name, version, path: path.to_path_buf(), + cache_info, }))) } } @@ -115,6 +121,7 @@ impl InstalledDist { name, version, path: path.to_path_buf(), + cache_info, }))) }; } @@ -256,13 +263,27 @@ impl InstalledDist { /// Read the `direct_url.json` file from a `.dist-info` directory. pub fn direct_url(path: &Path) -> Result> { let path = path.join("direct_url.json"); - let Ok(file) = fs_err::File::open(path) else { - return Ok(None); + let file = match fs_err::File::open(&path) { + Ok(file) => file, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), }; let direct_url = serde_json::from_reader::(file)?; Ok(Some(direct_url)) } + /// Read the `uv_cache.json` file from a `.dist-info` directory. + pub fn cache_info(path: &Path) -> Result> { + let path = path.join("uv_cache.json"); + let file = match fs_err::File::open(&path) { + Ok(file) => file, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + let cache_info = serde_json::from_reader::(file)?; + Ok(Some(cache_info)) + } + /// Read the `METADATA` file from a `.dist-info` directory. pub fn metadata(&self) -> Result { match self { diff --git a/crates/install-wheel-rs/Cargo.toml b/crates/install-wheel-rs/Cargo.toml index 7ca22b8ed5c6..43feb4359055 100644 --- a/crates/install-wheel-rs/Cargo.toml +++ b/crates/install-wheel-rs/Cargo.toml @@ -2,7 +2,7 @@ name = "install-wheel-rs" version = "0.0.1" publish = false -description = "Takes a wheel and installs it, either in a venv or for monotrail" +description = "Takes a wheel and installs it." keywords = ["wheel", "python"] edition = { workspace = true } @@ -24,6 +24,7 @@ distribution-filename = { workspace = true } pep440_rs = { workspace = true } platform-tags = { workspace = true } pypi-types = { workspace = true } +uv-cache-info = { workspace = true } uv-fs = { workspace = true } uv-normalize = { workspace = true } uv-warnings = { workspace = true } diff --git a/crates/install-wheel-rs/Readme.md b/crates/install-wheel-rs/Readme.md deleted file mode 100644 index 178bb861d78a..000000000000 --- a/crates/install-wheel-rs/Readme.md +++ /dev/null @@ -1,15 +0,0 @@ -Reimplementation of wheel installing in rust. Supports both classical venvs and monotrail. - -There are simple python bindings: - -```python -from install_wheel_rs import LockedVenv - -locked_venv = LockedVenv("path/to/.venv") -locked_venv.install_wheel("path/to/some_tagged_wheel.whl") -``` - -and there's only one function: `install_wheels_venv(wheels: List[str], venv: str)`, where `wheels` -is a list of paths to wheel files and `venv` is the location of the venv to install the packages in. - -See monotrail for benchmarks. diff --git a/crates/install-wheel-rs/src/linker.rs b/crates/install-wheel-rs/src/linker.rs index 4fb76e1cd912..9226474f60a4 100644 --- a/crates/install-wheel-rs/src/linker.rs +++ b/crates/install-wheel-rs/src/linker.rs @@ -5,6 +5,12 @@ use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; +use crate::script::{scripts_from_ini, Script}; +use crate::wheel::{ + extra_dist_info, install_data, parse_wheel_file, read_record_file, write_script_entrypoints, + LibKind, +}; +use crate::{Error, Layout}; use distribution_filename::WheelFilename; use fs_err as fs; use fs_err::{DirEntry, File}; @@ -14,16 +20,10 @@ use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use tempfile::tempdir_in; use tracing::{debug, instrument}; +use uv_cache_info::CacheInfo; use uv_warnings::warn_user_once; use walkdir::WalkDir; -use crate::script::{scripts_from_ini, Script}; -use crate::wheel::{ - extra_dist_info, install_data, parse_wheel_file, read_record_file, write_script_entrypoints, - LibKind, -}; -use crate::{Error, Layout}; - #[derive(Debug, Default)] pub struct Locks(Mutex>>>); @@ -41,6 +41,7 @@ pub fn install_wheel( wheel: impl AsRef, filename: &WheelFilename, direct_url: Option<&DirectUrl>, + cache_info: Option<&CacheInfo>, installer: Option<&str>, link_mode: LinkMode, locks: &Locks, @@ -145,6 +146,7 @@ pub fn install_wheel( &dist_info_prefix, true, direct_url, + cache_info, installer, &mut record, )?; diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index f2f46d118145..13e73a126a98 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -3,25 +3,24 @@ use std::io::{BufReader, Cursor, Read, Seek, Write}; use std::path::{Path, PathBuf}; use std::{env, io}; +use crate::record::RecordEntry; +use crate::script::Script; +use crate::{Error, Layout}; use data_encoding::BASE64URL_NOPAD; use fs_err as fs; use fs_err::{DirEntry, File}; use mailparse::parse_headers; +use pypi_types::DirectUrl; use rustc_hash::FxHashMap; use sha2::{Digest, Sha256}; use tracing::{instrument, warn}; +use uv_cache_info::CacheInfo; +use uv_fs::{relative_to, Simplified}; +use uv_normalize::PackageName; use walkdir::WalkDir; use zip::write::FileOptions; use zip::ZipWriter; -use pypi_types::DirectUrl; -use uv_fs::{relative_to, Simplified}; -use uv_normalize::PackageName; - -use crate::record::RecordEntry; -use crate::script::Script; -use crate::{Error, Layout}; - const LAUNCHER_MAGIC_NUMBER: [u8; 4] = [b'U', b'V', b'U', b'V']; #[cfg(all(windows, target_arch = "x86"))] @@ -728,6 +727,7 @@ pub(crate) fn extra_dist_info( dist_info_prefix: &str, requested: bool, direct_url: Option<&DirectUrl>, + cache_info: Option<&CacheInfo>, installer: Option<&str>, record: &mut Vec, ) -> Result<(), Error> { @@ -743,6 +743,14 @@ pub(crate) fn extra_dist_info( record, )?; } + if let Some(cache_info) = cache_info { + write_file_recorded( + site_packages, + &dist_info_dir.join("uv_cache.json"), + serde_json::to_string(cache_info)?.as_bytes(), + record, + )?; + } if let Some(installer) = installer { write_file_recorded( site_packages, diff --git a/crates/uv-cache-info/Cargo.toml b/crates/uv-cache-info/Cargo.toml new file mode 100644 index 000000000000..e54d1f7af608 --- /dev/null +++ b/crates/uv-cache-info/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "uv-cache-info" +version = "0.0.1" +edition = { workspace = true } +rust-version = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } + +[lints] +workspace = true + +[dependencies] +fs-err = { workspace = true } +schemars = { workspace = true, optional = true } +serde = { workspace = true, features = ["derive"] } +thiserror = { workspace = true } +toml = { workspace = true } +tracing = { workspace = true } diff --git a/crates/uv-cache-info/src/cache_info.rs b/crates/uv-cache-info/src/cache_info.rs new file mode 100644 index 000000000000..5f45aa3eb51c --- /dev/null +++ b/crates/uv-cache-info/src/cache_info.rs @@ -0,0 +1,174 @@ +use crate::commit_info::CacheCommit; +use crate::timestamp::Timestamp; + +use serde::Deserialize; +use std::cmp::max; +use std::io; +use std::path::{Path, PathBuf}; +use tracing::debug; + +/// The information used to determine whether a built distribution is up-to-date, based on the +/// timestamps of relevant files, the current commit of a repository, etc. +#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "kebab-case")] +#[serde(try_from = "CacheInfoWire")] +pub struct CacheInfo { + /// The timestamp of the most recent `ctime` of any relevant files, at the time of the build. + /// The timestamp will typically be the maximum of the `ctime` values of the `pyproject.toml`, + /// `setup.py`, and `setup.cfg` files, if they exist; however, users can provide additional + /// files to timestamp via the `cache-keys` field. + timestamp: Option, + /// The commit at which the distribution was built. + commit: Option, +} + +impl CacheInfo { + /// Return the [`CacheInfo`] for a given timestamp. + pub fn from_timestamp(timestamp: Timestamp) -> Self { + Self { + timestamp: Some(timestamp), + ..Self::default() + } + } + + /// Compute the cache info for a given path, which may be a file or a directory. + pub fn from_path(path: &Path) -> io::Result { + let metadata = fs_err::metadata(path)?; + if metadata.is_file() { + Self::from_file(path) + } else { + Self::from_directory(path) + } + } + + /// Compute the cache info for a given directory. + pub fn from_directory(directory: &Path) -> io::Result { + let mut commit = None; + let mut timestamp = None; + + // Read the cache keys. + let cache_keys = + if let Ok(contents) = fs_err::read_to_string(directory.join("pyproject.toml")) { + if let Ok(pyproject_toml) = toml::from_str::(&contents) { + pyproject_toml + .tool + .and_then(|tool| tool.uv) + .and_then(|tool_uv| tool_uv.cache_keys) + } else { + None + } + } else { + None + }; + + // If no cache keys were defined, use the defaults. + let cache_keys = cache_keys.unwrap_or_else(|| { + vec![ + CacheKey::Path(directory.join("pyproject.toml")), + CacheKey::Path(directory.join("setup.py")), + CacheKey::Path(directory.join("setup.cfg")), + ] + }); + + // Incorporate any additional timestamps or VCS information. + for cache_key in &cache_keys { + match cache_key { + CacheKey::Path(file) | CacheKey::File { file } => { + timestamp = max( + timestamp, + file.metadata() + .ok() + .filter(std::fs::Metadata::is_file) + .as_ref() + .map(Timestamp::from_metadata), + ); + } + CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) { + Ok(commit_info) => commit = Some(commit_info), + Err(err) => { + debug!("Failed to read the current commit: {err}"); + } + }, + CacheKey::Git { git: false } => {} + } + } + + Ok(Self { timestamp, commit }) + } + + /// Compute the cache info for a given file, assumed to be a binary or source distribution + /// represented as (e.g.) a `.whl` or `.tar.gz` archive. + pub fn from_file(path: impl AsRef) -> Result { + let metadata = fs_err::metadata(path.as_ref())?; + let timestamp = Timestamp::from_metadata(&metadata); + Ok(Self { + timestamp: Some(timestamp), + ..Self::default() + }) + } + + pub fn is_empty(&self) -> bool { + self.timestamp.is_none() && self.commit.is_none() + } +} + +#[derive(Debug, serde::Deserialize)] +struct TimestampCommit { + timestamp: Option, + commit: Option, +} + +#[derive(Debug, serde::Deserialize)] +#[serde(untagged)] +enum CacheInfoWire { + /// For backwards-compatibility, enable deserializing [`CacheInfo`] structs that are solely + /// represented by a timestamp. + Timestamp(Timestamp), + /// A [`CacheInfo`] struct that includes both a timestamp and a commit. + TimestampCommit(TimestampCommit), +} + +impl From for CacheInfo { + fn from(wire: CacheInfoWire) -> Self { + match wire { + CacheInfoWire::Timestamp(timestamp) => Self { + timestamp: Some(timestamp), + ..Self::default() + }, + CacheInfoWire::TimestampCommit(TimestampCommit { timestamp, commit }) => { + Self { timestamp, commit } + } + } + } +} + +/// A `pyproject.toml` with an (optional) `[tool.uv]` section. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct PyProjectToml { + tool: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct Tool { + uv: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct ToolUv { + cache_keys: Option>, +} + +#[derive(Debug, Clone, serde::Deserialize)] +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] +pub enum CacheKey { + /// Ex) `"Cargo.lock"` + Path(PathBuf), + /// Ex) `{ file = "Cargo.lock" }` + File { file: PathBuf }, + /// Ex) `{ git = true }` + Git { git: bool }, +} diff --git a/crates/uv-cache-info/src/commit_info.rs b/crates/uv-cache-info/src/commit_info.rs new file mode 100644 index 000000000000..0f1df788b9b7 --- /dev/null +++ b/crates/uv-cache-info/src/commit_info.rs @@ -0,0 +1,91 @@ +use std::path::{Path, PathBuf}; + +#[derive(Debug, thiserror::Error)] +pub(crate) enum CacheCommitError { + #[error("The repository at {0} is missing a `.git` directory")] + MissingGitDir(PathBuf), + #[error("The repository at {0} is missing a `HEAD` file")] + MissingHead(PathBuf), + #[error("The repository at {0} has an invalid reference: `{1}`")] + InvalidRef(PathBuf, String), + #[error("The discovered commit has an invalid length (expected 40 characters): `{0}`")] + WrongLength(String), + #[error("The discovered commit has an invalid character (expected hexadecimal): `{0}`")] + WrongDigit(String), + #[error(transparent)] + Io(#[from] std::io::Error), +} + +/// The current commit for a repository (i.e., a 40-character hexadecimal string). +#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +pub(crate) struct CacheCommit(String); + +impl CacheCommit { + /// Return the [`CacheCommit`] for the repository at the given path. + pub(crate) fn from_repository(path: &Path) -> Result { + // Find the `.git` directory, searching through parent directories if necessary. + let git_dir = path + .ancestors() + .map(|ancestor| ancestor.join(".git")) + .find(|git_dir| git_dir.exists()) + .ok_or_else(|| CacheCommitError::MissingGitDir(path.to_path_buf()))?; + + let git_head_path = + git_head(&git_dir).ok_or_else(|| CacheCommitError::MissingHead(git_dir.clone()))?; + let git_head_contents = fs_err::read_to_string(git_head_path)?; + + // The contents are either a commit or a reference in the following formats + // - "" when the head is detached + // - "ref " when working on a branch + // If a commit, checking if the HEAD file has changed is sufficient + // If a ref, we need to add the head file for that ref to rebuild on commit + let mut git_ref_parts = git_head_contents.split_whitespace(); + let commit_or_ref = git_ref_parts.next().ok_or_else(|| { + CacheCommitError::InvalidRef(git_dir.clone(), git_head_contents.clone()) + })?; + let commit = if let Some(git_ref) = git_ref_parts.next() { + let git_ref_path = git_dir.join(git_ref); + fs_err::read_to_string(git_ref_path)? + } else { + commit_or_ref.to_string() + }; + + // The commit should be 40 hexadecimal characters. + if commit.len() != 40 { + return Err(CacheCommitError::WrongLength(commit)); + } + if commit.chars().any(|c| !c.is_ascii_hexdigit()) { + return Err(CacheCommitError::WrongDigit(commit)); + } + + Ok(Self(commit)) + } +} + +/// Return the path to the `HEAD` file of a Git repository, taking worktrees into account. +fn git_head(git_dir: &Path) -> Option { + // The typical case is a standard git repository. + let git_head_path = git_dir.join("HEAD"); + if git_head_path.exists() { + return Some(git_head_path); + } + if !git_dir.is_file() { + return None; + } + // If `.git/HEAD` doesn't exist and `.git` is actually a file, + // then let's try to attempt to read it as a worktree. If it's + // a worktree, then its contents will look like this, e.g.: + // + // gitdir: /home/andrew/astral/uv/main/.git/worktrees/pr2 + // + // And the HEAD file we want to watch will be at: + // + // /home/andrew/astral/uv/main/.git/worktrees/pr2/HEAD + let contents = fs_err::read_to_string(git_dir).ok()?; + let (label, worktree_path) = contents.split_once(':')?; + if label != "gitdir" { + return None; + } + let worktree_path = worktree_path.trim(); + Some(PathBuf::from(worktree_path)) +} diff --git a/crates/uv-cache-info/src/lib.rs b/crates/uv-cache-info/src/lib.rs new file mode 100644 index 000000000000..c09398c7c290 --- /dev/null +++ b/crates/uv-cache-info/src/lib.rs @@ -0,0 +1,6 @@ +pub use crate::cache_info::*; +pub use crate::timestamp::*; + +mod cache_info; +mod commit_info; +mod timestamp; diff --git a/crates/uv-cache/src/timestamp.rs b/crates/uv-cache-info/src/timestamp.rs similarity index 94% rename from crates/uv-cache/src/timestamp.rs rename to crates/uv-cache-info/src/timestamp.rs index aa5405366545..262a03fedf49 100644 --- a/crates/uv-cache/src/timestamp.rs +++ b/crates/uv-cache-info/src/timestamp.rs @@ -9,7 +9,7 @@ use std::path::Path; /// /// See: /// See: -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)] +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)] pub struct Timestamp(std::time::SystemTime); impl Timestamp { diff --git a/crates/uv-cache/Cargo.toml b/crates/uv-cache/Cargo.toml index 36851c883da7..3735f95ad26b 100644 --- a/crates/uv-cache/Cargo.toml +++ b/crates/uv-cache/Cargo.toml @@ -17,6 +17,7 @@ workspace = true cache-key = { workspace = true } distribution-types = { workspace = true } pypi-types = { workspace = true } +uv-cache-info = { workspace = true } uv-fs = { workspace = true, features = ["tokio"] } uv-normalize = { workspace = true } diff --git a/crates/uv-cache/src/by_timestamp.rs b/crates/uv-cache/src/by_timestamp.rs index 2a02d70a5b3f..b6f6b542a045 100644 --- a/crates/uv-cache/src/by_timestamp.rs +++ b/crates/uv-cache/src/by_timestamp.rs @@ -1,6 +1,5 @@ use serde::{Deserialize, Serialize}; - -use crate::timestamp::Timestamp; +use uv_cache_info::Timestamp; #[derive(Deserialize, Serialize)] pub struct CachedByTimestamp { diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index cc1305526d7d..0ecf2f39e018 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -12,6 +12,7 @@ use tracing::debug; pub use archive::ArchiveId; use distribution_types::InstalledDist; use pypi_types::Metadata23; +use uv_cache_info::Timestamp; use uv_fs::{cachedir, directories}; use uv_normalize::PackageName; @@ -19,7 +20,6 @@ pub use crate::by_timestamp::CachedByTimestamp; #[cfg(feature = "clap")] pub use crate::cli::CacheArgs; pub use crate::removal::{rm_rf, Removal}; -pub use crate::timestamp::Timestamp; pub use crate::wheel::WheelCache; use crate::wheel::WheelCacheKind; @@ -28,7 +28,6 @@ mod by_timestamp; #[cfg(feature = "clap")] mod cli; mod removal; -mod timestamp; mod wheel; /// A [`CacheEntry`] which may or may not exist yet. diff --git a/crates/uv-configuration/Cargo.toml b/crates/uv-configuration/Cargo.toml index 1b0381b1d89b..843c2afe7400 100644 --- a/crates/uv-configuration/Cargo.toml +++ b/crates/uv-configuration/Cargo.toml @@ -13,12 +13,14 @@ license = { workspace = true } workspace = true [dependencies] +cache-key = { workspace = true } distribution-types = { workspace = true } pep508_rs = { workspace = true, features = ["schemars"] } platform-tags = { workspace = true } pypi-types = { workspace = true } uv-auth = { workspace = true } uv-cache = { workspace = true } +uv-cache-info = { workspace = true } uv-normalize = { workspace = true } clap = { workspace = true, features = ["derive"], optional = true } diff --git a/crates/uv-configuration/src/config_settings.rs b/crates/uv-configuration/src/config_settings.rs index f8f33e39ec0e..7ace0ac26b90 100644 --- a/crates/uv-configuration/src/config_settings.rs +++ b/crates/uv-configuration/src/config_settings.rs @@ -1,3 +1,4 @@ +use cache_key::CacheKeyHasher; use std::{ collections::{btree_map::Entry, BTreeMap}, str::FromStr, @@ -108,6 +109,16 @@ impl FromIterator for ConfigSettings { } impl ConfigSettings { + /// Returns the number of settings in the configuration. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if the configuration contains no settings. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + /// Convert the settings to a string that can be passed directly to a PEP 517 build backend. pub fn escape_for_python(&self) -> String { serde_json::to_string(self).expect("Failed to serialize config settings") @@ -150,6 +161,18 @@ impl ConfigSettings { } } +impl cache_key::CacheKey for ConfigSettings { + fn cache_key(&self, state: &mut CacheKeyHasher) { + for (key, value) in &self.0 { + key.cache_key(state); + match value { + ConfigSettingValue::String(value) => value.cache_key(state), + ConfigSettingValue::List(values) => values.cache_key(state), + } + } + } +} + impl serde::Serialize for ConfigSettings { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeMap; diff --git a/crates/uv-configuration/src/package_options.rs b/crates/uv-configuration/src/package_options.rs index 4fd8e5dad64b..babaa5dadd79 100644 --- a/crates/uv-configuration/src/package_options.rs +++ b/crates/uv-configuration/src/package_options.rs @@ -3,7 +3,8 @@ use pep508_rs::PackageName; use pypi_types::Requirement; use rustc_hash::FxHashMap; -use uv_cache::{Refresh, Timestamp}; +use uv_cache::Refresh; +use uv_cache_info::Timestamp; /// Whether to reinstall packages. #[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)] diff --git a/crates/uv-dispatch/src/lib.rs b/crates/uv-dispatch/src/lib.rs index bb1e8446e743..52da7fdff277 100644 --- a/crates/uv-dispatch/src/lib.rs +++ b/crates/uv-dispatch/src/lib.rs @@ -140,6 +140,10 @@ impl<'a> BuildContext for BuildDispatch<'a> { self.build_options } + fn config_settings(&self) -> &ConfigSettings { + self.config_settings + } + fn sources(&self) -> SourceStrategy { self.sources } diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 717df6e31fbe..4b798c025379 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -21,6 +21,7 @@ pep508_rs = { workspace = true } platform-tags = { workspace = true } pypi-types = { workspace = true } uv-cache = { workspace = true } +uv-cache-info = { workspace = true } uv-client = { workspace = true } uv-configuration = { workspace = true } uv-extract = { workspace = true } diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index a008cda2ed3e..0a67c6972e25 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -21,7 +21,8 @@ use distribution_types::{ }; use platform_tags::Tags; use pypi_types::HashDigest; -use uv_cache::{ArchiveId, ArchiveTimestamp, CacheBucket, CacheEntry, Timestamp, WheelCache}; +use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache}; +use uv_cache_info::{CacheInfo, Timestamp}; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, }; @@ -187,6 +188,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), + cache: CacheInfo::default(), }), Err(Error::Extract(err)) => { if err.is_http_streaming_unsupported() { @@ -217,6 +219,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), + cache: CacheInfo::default(), }) } Err(err) => Err(err), @@ -248,6 +251,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), + cache: CacheInfo::default(), }), Err(Error::Client(err)) if err.is_http_streaming_unsupported() => { warn!( @@ -271,6 +275,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: wheel.filename.clone(), + cache: CacheInfo::default(), }) } Err(err) => Err(err), @@ -325,6 +330,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive, filename: built_wheel.filename, hashes: built_wheel.hashes, + cache: built_wheel.cache_info, }); } Err(err) if err.kind() == io::ErrorKind::NotFound => {} @@ -341,6 +347,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&id), hashes: built_wheel.hashes, filename: built_wheel.filename, + cache: built_wheel.cache_info, }) } @@ -724,7 +731,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { hashes: HashPolicy<'_>, ) -> Result { // Determine the last-modified time of the wheel. - let modified = ArchiveTimestamp::from_file(path).map_err(Error::CacheRead)?; + let modified = Timestamp::from_path(path).map_err(Error::CacheRead)?; // Attempt to read the archive pointer from the cache. let pointer_entry = wheel_entry.with_file(format!("{}.rev", filename.stem())); @@ -743,6 +750,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: filename.clone(), + cache: CacheInfo::from_timestamp(modified), }) } else if hashes.is_none() { // Otherwise, unzip the wheel. @@ -750,7 +758,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { // Write the archive pointer to the cache. let pointer = LocalArchivePointer { - timestamp: modified.timestamp(), + timestamp: modified, archive: archive.clone(), }; pointer.write_to(&pointer_entry).await?; @@ -760,6 +768,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: filename.clone(), + cache: CacheInfo::from_timestamp(modified), }) } else { // If necessary, compute the hashes of the wheel. @@ -795,7 +804,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { // Write the archive pointer to the cache. let pointer = LocalArchivePointer { - timestamp: modified.timestamp(), + timestamp: modified, archive: archive.clone(), }; pointer.write_to(&pointer_entry).await?; @@ -805,6 +814,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { archive: self.build_context.cache().archive(&archive.id), hashes: archive.hashes, filename: filename.clone(), + cache: CacheInfo::from_timestamp(modified), }) } } @@ -960,6 +970,11 @@ impl HttpArchivePointer { pub fn into_archive(self) -> Archive { self.archive } + + /// Return the [`CacheInfo`] from the pointer. + pub fn to_cache_info(&self) -> CacheInfo { + CacheInfo::default() + } } /// A pointer to an archive in the cache, fetched from a local path. @@ -989,12 +1004,17 @@ impl LocalArchivePointer { } /// Returns `true` if the archive is up-to-date with the given modified timestamp. - pub fn is_up_to_date(&self, modified: ArchiveTimestamp) -> bool { - self.timestamp == modified.timestamp() + pub fn is_up_to_date(&self, modified: Timestamp) -> bool { + self.timestamp == modified } /// Return the [`Archive`] from the pointer. pub fn into_archive(self) -> Archive { self.archive } + + /// Return the [`CacheInfo`] from the pointer. + pub fn to_cache_info(&self) -> CacheInfo { + CacheInfo::from_timestamp(self.timestamp) + } } diff --git a/crates/uv-distribution/src/download.rs b/crates/uv-distribution/src/download.rs index 68db0b722de7..94692ed2d027 100644 --- a/crates/uv-distribution/src/download.rs +++ b/crates/uv-distribution/src/download.rs @@ -1,10 +1,10 @@ use std::path::{Path, PathBuf}; +use crate::Error; use distribution_filename::WheelFilename; use distribution_types::{CachedDist, Dist, Hashed}; use pypi_types::{HashDigest, Metadata23}; - -use crate::Error; +use uv_cache_info::CacheInfo; /// A locally available wheel. #[derive(Debug, Clone)] @@ -16,6 +16,8 @@ pub struct LocalWheel { /// The canonicalized path in the cache directory to which the wheel was downloaded. /// Typically, a directory within the archive bucket. pub(crate) archive: PathBuf, + /// The cache index of the wheel. + pub(crate) cache: CacheInfo, /// The computed hashes of the wheel. pub(crate) hashes: Vec, } @@ -51,7 +53,13 @@ impl Hashed for LocalWheel { /// Convert a [`LocalWheel`] into a [`CachedDist`]. impl From for CachedDist { fn from(wheel: LocalWheel) -> CachedDist { - CachedDist::from_remote(wheel.dist, wheel.filename, wheel.hashes, wheel.archive) + CachedDist::from_remote( + wheel.dist, + wheel.filename, + wheel.hashes, + wheel.cache, + wheel.archive, + ) } } diff --git a/crates/uv-distribution/src/index/built_wheel_index.rs b/crates/uv-distribution/src/index/built_wheel_index.rs index a621744d94b4..a7e83f7ecc7f 100644 --- a/crates/uv-distribution/src/index/built_wheel_index.rs +++ b/crates/uv-distribution/src/index/built_wheel_index.rs @@ -1,15 +1,15 @@ +use crate::index::cached_wheel::CachedWheel; +use crate::source::{HttpRevisionPointer, LocalRevisionPointer, HTTP_REVISION, LOCAL_REVISION}; +use crate::Error; use distribution_types::{ DirectUrlSourceDist, DirectorySourceDist, GitSourceDist, Hashed, PathSourceDist, }; use platform_tags::Tags; -use uv_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, WheelCache}; +use uv_cache::{Cache, CacheBucket, CacheShard, WheelCache}; +use uv_cache_info::CacheInfo; use uv_fs::symlinks; use uv_types::HashStrategy; -use crate::index::cached_wheel::CachedWheel; -use crate::source::{HttpRevisionPointer, LocalRevisionPointer, HTTP_REVISION, LOCAL_REVISION}; -use crate::Error; - /// A local index of built distributions for a specific source distribution. #[derive(Debug)] pub struct BuiltWheelIndex<'a> { @@ -51,9 +51,10 @@ impl<'a> BuiltWheelIndex<'a> { return Ok(None); } - Ok(self.find(&cache_shard.shard(revision.id()))) - } + let cache_shard = cache_shard.shard(revision.id()); + Ok(self.find(&cache_shard)) + } /// Return the most compatible [`CachedWheel`] for a given source distribution at a local path. pub fn path(&self, source_dist: &PathSourceDist) -> Result, Error> { let cache_shard = self.cache.shard( @@ -67,12 +68,10 @@ impl<'a> BuiltWheelIndex<'a> { return Ok(None); }; - // Determine the last-modified time of the source distribution. - let modified = - ArchiveTimestamp::from_file(&source_dist.install_path).map_err(Error::CacheRead)?; - // If the distribution is stale, omit it from the index. - if !pointer.is_up_to_date(modified) { + let cache_info = + CacheInfo::from_file(&source_dist.install_path).map_err(Error::CacheRead)?; + if cache_info != *pointer.cache_info() { return Ok(None); } @@ -82,7 +81,11 @@ impl<'a> BuiltWheelIndex<'a> { return Ok(None); } - Ok(self.find(&cache_shard.shard(revision.id()))) + let cache_shard = cache_shard.shard(revision.id()); + + Ok(self + .find(&cache_shard) + .map(|wheel| wheel.with_cache_info(cache_info))) } /// Return the most compatible [`CachedWheel`] for a given source distribution built from a @@ -106,17 +109,11 @@ impl<'a> BuiltWheelIndex<'a> { return Ok(None); }; - // Determine the last-modified time of the source distribution. - let Some(modified) = ArchiveTimestamp::from_source_tree(&source_dist.install_path) - .map_err(Error::CacheRead)? - else { - return Err(Error::DirWithoutEntrypoint( - source_dist.install_path.clone(), - )); - }; - // If the distribution is stale, omit it from the index. - if !pointer.is_up_to_date(modified) { + let cache_info = + CacheInfo::from_directory(&source_dist.install_path).map_err(Error::CacheRead)?; + + if cache_info != *pointer.cache_info() { return Ok(None); } @@ -126,7 +123,11 @@ impl<'a> BuiltWheelIndex<'a> { return Ok(None); } - Ok(self.find(&cache_shard.shard(revision.id()))) + let cache_shard = cache_shard.shard(revision.id()); + + Ok(self + .find(&cache_shard) + .map(|wheel| wheel.with_cache_info(cache_info))) } /// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL. diff --git a/crates/uv-distribution/src/index/cached_wheel.rs b/crates/uv-distribution/src/index/cached_wheel.rs index 9b80db305389..461df70da50b 100644 --- a/crates/uv-distribution/src/index/cached_wheel.rs +++ b/crates/uv-distribution/src/index/cached_wheel.rs @@ -1,13 +1,13 @@ use std::path::Path; +use crate::archive::Archive; +use crate::{HttpArchivePointer, LocalArchivePointer}; use distribution_filename::WheelFilename; use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed}; use pep508_rs::VerbatimUrl; use pypi_types::HashDigest; use uv_cache::{Cache, CacheBucket, CacheEntry}; - -use crate::archive::Archive; -use crate::{HttpArchivePointer, LocalArchivePointer}; +use uv_cache_info::CacheInfo; #[derive(Debug, Clone)] pub struct CachedWheel { @@ -17,6 +17,8 @@ pub struct CachedWheel { pub entry: CacheEntry, /// The [`HashDigest`]s for the wheel. pub hashes: Vec, + /// The [`CacheInfo`] for the wheel. + pub cache_info: CacheInfo, } impl CachedWheel { @@ -32,10 +34,12 @@ impl CachedWheel { let archive = path.canonicalize().ok()?; let entry = CacheEntry::from_path(archive); let hashes = Vec::new(); + let cache_info = CacheInfo::default(); Some(Self { filename, entry, hashes, + cache_info, }) } @@ -45,6 +49,7 @@ impl CachedWheel { filename: self.filename, path: self.entry.into_path_buf(), hashes: self.hashes, + cache_info: self.cache_info, } } @@ -57,6 +62,7 @@ impl CachedWheel { editable: false, r#virtual: false, hashes: self.hashes, + cache_info: self.cache_info, } } @@ -69,6 +75,7 @@ impl CachedWheel { editable: true, r#virtual: false, hashes: self.hashes, + cache_info: self.cache_info, } } @@ -81,6 +88,7 @@ impl CachedWheel { editable: false, r#virtual: true, hashes: self.hashes, + cache_info: self.cache_info, } } @@ -94,14 +102,17 @@ impl CachedWheel { // Read the pointer. let pointer = HttpArchivePointer::read_from(path).ok()??; + let cache_info = pointer.to_cache_info(); let Archive { id, hashes } = pointer.into_archive(); - // Convert to a cached wheel. let entry = cache.entry(CacheBucket::Archive, "", id); + + // Convert to a cached wheel. Some(Self { filename, entry, hashes, + cache_info, }) } @@ -115,6 +126,7 @@ impl CachedWheel { // Read the pointer. let pointer = LocalArchivePointer::read_from(path).ok()??; + let cache_info = pointer.to_cache_info(); let Archive { id, hashes } = pointer.into_archive(); // Convert to a cached wheel. @@ -123,8 +135,15 @@ impl CachedWheel { filename, entry, hashes, + cache_info, }) } + + #[must_use] + pub fn with_cache_info(mut self, cache_info: CacheInfo) -> Self { + self.cache_info = cache_info; + self + } } impl Hashed for CachedWheel { diff --git a/crates/uv-distribution/src/source/built_wheel_metadata.rs b/crates/uv-distribution/src/source/built_wheel_metadata.rs index 664e32f8fc00..7a14f0d9805e 100644 --- a/crates/uv-distribution/src/source/built_wheel_metadata.rs +++ b/crates/uv-distribution/src/source/built_wheel_metadata.rs @@ -6,6 +6,7 @@ use distribution_types::Hashed; use platform_tags::Tags; use pypi_types::HashDigest; use uv_cache::CacheShard; +use uv_cache_info::CacheInfo; use uv_fs::files; /// The information about the wheel we either just built or got from the cache. @@ -19,6 +20,8 @@ pub(crate) struct BuiltWheelMetadata { pub(crate) filename: WheelFilename, /// The computed hashes of the source distribution from which the wheel was built. pub(crate) hashes: Vec, + /// The cache information for the underlying source distribution. + pub(crate) cache_info: CacheInfo, } impl BuiltWheelMetadata { @@ -43,11 +46,11 @@ impl BuiltWheelMetadata { target: cache_shard.join(filename.stem()), path, filename, + cache_info: CacheInfo::default(), hashes: vec![], }) } - /// Set the computed hashes of the wheel. #[must_use] pub(crate) fn with_hashes(mut self, hashes: Vec) -> Self { self.hashes = hashes; diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 4c7a421a9ec7..483e4619b587 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -5,26 +5,29 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; -use fs_err::tokio as fs; -use futures::{FutureExt, TryStreamExt}; -use reqwest::Response; -use tokio_util::compat::FuturesAsyncReadCompatExt; -use tracing::{debug, info_span, instrument, Instrument}; -use url::Url; -use zip::ZipArchive; - +use crate::distribution_database::ManagedClient; +use crate::error::Error; +use crate::metadata::{ArchiveMetadata, Metadata}; +use crate::reporter::Facade; +use crate::source::built_wheel_metadata::BuiltWheelMetadata; +use crate::source::revision::Revision; +use crate::{Reporter, RequiresDist}; use distribution_filename::{SourceDistExtension, WheelFilename}; use distribution_types::{ BuildableSource, DirectorySourceUrl, FileLocation, GitSourceUrl, HashPolicy, Hashed, PathSourceUrl, RemoteSource, SourceDist, SourceUrl, }; +use fs_err::tokio as fs; +use futures::{FutureExt, TryStreamExt}; use install_wheel_rs::metadata::read_archive_metadata; use platform_tags::Tags; use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt}; -use uv_cache::{ - ArchiveTimestamp, Cache, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Removal, - Timestamp, WheelCache, -}; +use reqwest::Response; +use tokio_util::compat::FuturesAsyncReadCompatExt; +use tracing::{debug, info_span, instrument, Instrument}; +use url::Url; +use uv_cache::{Cache, CacheBucket, CacheEntry, CacheShard, Removal, WheelCache}; +use uv_cache_info::CacheInfo; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, }; @@ -32,14 +35,7 @@ use uv_configuration::{BuildKind, BuildOutput}; use uv_extract::hash::Hasher; use uv_fs::{rename_with_retry, write_atomic, LockedFile}; use uv_types::{BuildContext, SourceBuildTrait}; - -use crate::distribution_database::ManagedClient; -use crate::error::Error; -use crate::metadata::{ArchiveMetadata, Metadata}; -use crate::reporter::Facade; -use crate::source::built_wheel_metadata::BuiltWheelMetadata; -use crate::source::revision::Revision; -use crate::{Reporter, RequiresDist}; +use zip::ZipArchive; mod built_wheel_metadata; mod revision; @@ -463,6 +459,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { target: cache_shard.join(wheel_filename.stem()), filename: wheel_filename, hashes: revision.into_hashes(), + cache_info: CacheInfo::default(), }) } @@ -522,7 +519,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { }); } - // Otherwise, we either need to build the metadata or the wheel. + // Otherwise, we either need to build the metadata. // If the backend supports `prepare_metadata_for_build_wheel`, use it. if let Some(metadata) = self .build_metadata(source, source_dist_entry.path(), subdirectory) @@ -654,7 +651,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let _lock = lock_shard(cache_shard).await?; // Fetch the revision for the source distribution. - let revision = self + let LocalRevisionPointer { + cache_info, + revision, + } = self .archive_revision(source, resource, cache_shard, hashes) .await?; @@ -705,6 +705,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { target: cache_shard.join(filename.stem()), filename, hashes: revision.into_hashes(), + cache_info, }) } @@ -722,7 +723,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let _lock = lock_shard(cache_shard).await?; // Fetch the revision for the source distribution. - let revision = self + let LocalRevisionPointer { revision, .. } = self .archive_revision(source, resource, cache_shard, hashes) .await?; @@ -814,14 +815,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { resource: &PathSourceUrl<'_>, cache_shard: &CacheShard, hashes: HashPolicy<'_>, - ) -> Result { + ) -> Result { // Verify that the archive exists. if !resource.path.is_file() { return Err(Error::NotFound(resource.url.clone())); } // Determine the last-modified time of the source distribution. - let modified = ArchiveTimestamp::from_file(&resource.path).map_err(Error::CacheRead)?; + let cache_info = CacheInfo::from_file(&resource.path).map_err(Error::CacheRead)?; // Read the existing metadata from the cache. let revision_entry = cache_shard.entry(LOCAL_REVISION); @@ -829,10 +830,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // If the revision already exists, return it. There's no need to check for freshness, since // we use an exact timestamp. if let Some(pointer) = LocalRevisionPointer::read_from(&revision_entry)? { - if pointer.is_up_to_date(modified) { - let revision = pointer.into_revision(); - if revision.has_digests(hashes) { - return Ok(revision); + if *pointer.cache_info() == cache_info { + if pointer.revision().has_digests(hashes) { + return Ok(pointer); } } } @@ -846,20 +846,18 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let hashes = self .persist_archive(&resource.path, resource.ext, entry.path(), hashes) .await?; + + // Include the hashes and cache info in the revision. let revision = revision.with_hashes(hashes); // Persist the revision. - write_atomic( - revision_entry.path(), - rmp_serde::to_vec(&CachedByTimestamp { - timestamp: modified.timestamp(), - data: revision.clone(), - })?, - ) - .await - .map_err(Error::CacheWrite)?; + let pointer = LocalRevisionPointer { + cache_info, + revision, + }; + pointer.write_to(&revision_entry).await?; - Ok(revision) + Ok(pointer) } /// Build a source distribution from a local source tree (i.e., directory), either editable or @@ -888,7 +886,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let _lock = lock_shard(&cache_shard).await?; // Fetch the revision for the source distribution. - let revision = self + let LocalRevisionPointer { + cache_info, + revision, + } = self .source_tree_revision(source, resource, &cache_shard) .await?; @@ -927,7 +928,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, - hashes: vec![], + hashes: revision.into_hashes(), + cache_info, }) } @@ -947,6 +949,19 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { return Err(Error::HashesNotSupportedSourceTree(source.to_string())); } + if let Some(metadata) = + Self::read_static_metadata(source, &resource.install_path, None).await? + { + return Ok(ArchiveMetadata::from( + Metadata::from_workspace( + metadata, + resource.install_path.as_ref(), + self.build_context.sources(), + ) + .await?, + )); + } + let cache_shard = self.build_context.cache().shard( CacheBucket::SourceDistributions, if resource.editable { @@ -959,7 +974,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let _lock = lock_shard(&cache_shard).await?; // Fetch the revision for the source distribution. - let revision = self + let LocalRevisionPointer { revision, .. } = self .source_tree_revision(source, resource, &cache_shard) .await?; @@ -967,19 +982,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); - if let Some(metadata) = - Self::read_static_metadata(source, &resource.install_path, None).await? - { - return Ok(ArchiveMetadata::from( - Metadata::from_workspace( - metadata, - resource.install_path.as_ref(), - self.build_context.sources(), - ) - .await?, - )); - } - // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { @@ -1055,20 +1057,15 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &DirectorySourceUrl<'_>, cache_shard: &CacheShard, - ) -> Result { + ) -> Result { // Verify that the source tree exists. if !resource.install_path.is_dir() { return Err(Error::NotFound(resource.url.clone())); } // Determine the last-modified time of the source distribution. - let Some(modified) = - ArchiveTimestamp::from_source_tree(&resource.install_path).map_err(Error::CacheRead)? - else { - return Err(Error::DirWithoutEntrypoint( - resource.install_path.to_path_buf(), - )); - }; + let cache_info = + CacheInfo::from_directory(&resource.install_path).map_err(Error::CacheRead)?; // Read the existing metadata from the cache. let entry = cache_shard.entry(LOCAL_REVISION); @@ -1082,8 +1079,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .is_fresh() { if let Some(pointer) = LocalRevisionPointer::read_from(&entry)? { - if pointer.timestamp == modified.timestamp() { - return Ok(pointer.into_revision()); + if *pointer.cache_info() == cache_info { + return Ok(pointer); } } } @@ -1091,12 +1088,12 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Otherwise, we need to create a new revision. let revision = Revision::new(); let pointer = LocalRevisionPointer { - timestamp: modified.timestamp(), - revision: revision.clone(), + cache_info, + revision, }; pointer.write_to(&entry).await?; - Ok(revision) + Ok(pointer) } /// Build a source distribution from a Git repository. @@ -1130,6 +1127,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { CacheBucket::SourceDistributions, WheelCache::Git(resource.url, &git_sha.to_short_string()).root(), ); + let metadata_entry = cache_shard.entry(METADATA); let _lock = lock_shard(&cache_shard).await?; @@ -1154,7 +1152,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } // Store the metadata. - let metadata_entry = cache_shard.entry(METADATA); write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?) .await .map_err(Error::CacheWrite)?; @@ -1164,6 +1161,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { target: cache_shard.join(filename.stem()), filename, hashes: vec![], + cache_info: CacheInfo::default(), }) } @@ -1200,6 +1198,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { CacheBucket::SourceDistributions, WheelCache::Git(resource.url, &git_sha.to_short_string()).root(), ); + let metadata_entry = cache_shard.entry(METADATA); let _lock = lock_shard(&cache_shard).await?; @@ -1218,8 +1217,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } // If the cache contains compatible metadata, return it. - let metadata_entry = cache_shard.entry(METADATA); - if self .build_context .cache() @@ -1737,7 +1734,7 @@ impl HttpRevisionPointer { /// Encoded with `MsgPack`, and represented on disk by a `.rev` file. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) struct LocalRevisionPointer { - timestamp: Timestamp, + cache_info: CacheInfo, revision: Revision, } @@ -1763,12 +1760,17 @@ impl LocalRevisionPointer { .map_err(Error::CacheWrite) } - /// Returns `true` if the revision is up-to-date with the given modified timestamp. - pub(crate) fn is_up_to_date(&self, modified: ArchiveTimestamp) -> bool { - self.timestamp == modified.timestamp() + /// Return the [`CacheInfo`] for the pointer. + pub(crate) fn cache_info(&self) -> &CacheInfo { + &self.cache_info } - /// Return the [`Revision`] from the pointer. + /// Return the [`Revision`] for the pointer. + pub(crate) fn revision(&self) -> &Revision { + &self.revision + } + + /// Return the [`Revision`] for the pointer. pub(crate) fn into_revision(self) -> Revision { self.revision } diff --git a/crates/uv-git/Cargo.toml b/crates/uv-git/Cargo.toml index 916d6cffa993..57a912e9cc75 100644 --- a/crates/uv-git/Cargo.toml +++ b/crates/uv-git/Cargo.toml @@ -27,4 +27,4 @@ serde = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } -url = { workspace = true } \ No newline at end of file +url = { workspace = true } diff --git a/crates/uv-installer/Cargo.toml b/crates/uv-installer/Cargo.toml index 8f1663a03004..c6ee2679710c 100644 --- a/crates/uv-installer/Cargo.toml +++ b/crates/uv-installer/Cargo.toml @@ -22,6 +22,7 @@ pep508_rs = { workspace = true } platform-tags = { workspace = true } pypi-types = { workspace = true } uv-cache = { workspace = true } +uv-cache-info = { workspace = true } uv-configuration = { workspace = true } uv-distribution = { workspace = true } uv-extract = { workspace = true } diff --git a/crates/uv-installer/src/installer.rs b/crates/uv-installer/src/installer.rs index 5abdfd887057..191e536a11ac 100644 --- a/crates/uv-installer/src/installer.rs +++ b/crates/uv-installer/src/installer.rs @@ -149,6 +149,11 @@ fn install( .map(pypi_types::DirectUrl::try_from) .transpose()? .as_ref(), + if wheel.cache_info().is_empty() { + None + } else { + Some(wheel.cache_info()) + }, installer_name.as_deref(), link_mode, &locks, diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index 0bbe89fa2f96..2c5c73d38cd4 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -13,7 +13,8 @@ use distribution_types::{ }; use platform_tags::Tags; use pypi_types::{Requirement, RequirementSource, ResolverMarkerEnvironment}; -use uv_cache::{ArchiveTimestamp, Cache, CacheBucket, WheelCache}; +use uv_cache::{Cache, CacheBucket, WheelCache}; +use uv_cache_info::{CacheInfo, Timestamp}; use uv_configuration::{BuildOptions, Reinstall}; use uv_distribution::{ BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex, @@ -93,8 +94,7 @@ impl<'a> Planner<'a> { } } - // Check if the package should be reinstalled. A reinstall involves (1) purging any - // cached distributions, and (2) marking any installed distributions as extraneous. + // Check if the package should be reinstalled. let reinstall = match reinstall { Reinstall::None => false, Reinstall::All => true, @@ -207,6 +207,7 @@ impl<'a> Planner<'a> { wheel.filename, wheel.url, archive.hashes, + CacheInfo::default(), cache.archive(&archive.id), ); @@ -361,14 +362,16 @@ impl<'a> Planner<'a> { .entry(format!("{}.rev", wheel.filename.stem())); if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? { - let timestamp = ArchiveTimestamp::from_file(&wheel.install_path)?; + let timestamp = Timestamp::from_path(&wheel.install_path)?; if pointer.is_up_to_date(timestamp) { + let cache_info = pointer.to_cache_info(); let archive = pointer.into_archive(); if archive.satisfies(hasher.get(&wheel)) { let cached_dist = CachedDirectUrlDist::from_url( wheel.filename, wheel.url, archive.hashes, + cache_info, cache.archive(&archive.id), ); diff --git a/crates/uv-installer/src/satisfies.rs b/crates/uv-installer/src/satisfies.rs index c91f95d01c3c..c6d8afe752dc 100644 --- a/crates/uv-installer/src/satisfies.rs +++ b/crates/uv-installer/src/satisfies.rs @@ -7,7 +7,7 @@ use url::Url; use cache_key::{CanonicalUrl, RepositoryUrl}; use distribution_types::{InstalledDirectUrlDist, InstalledDist}; use pypi_types::{DirInfo, DirectUrl, RequirementSource, VcsInfo, VcsKind}; -use uv_cache::{ArchiveTarget, ArchiveTimestamp}; +use uv_cache_info::CacheInfo; #[derive(Debug, Copy, Clone)] pub(crate) enum RequirementSatisfaction { @@ -50,6 +50,7 @@ impl RequirementSatisfaction { let InstalledDist::Url(InstalledDirectUrlDist { direct_url, editable, + cache_info, .. }) = &distribution else { @@ -81,10 +82,10 @@ impl RequirementSatisfaction { // If the requirement came from a local path, check freshness. if requested_url.scheme() == "file" { if let Ok(archive) = requested_url.to_file_path() { - if !ArchiveTimestamp::up_to_date_with( - &archive, - ArchiveTarget::Install(distribution), - )? { + let Some(cache_info) = cache_info.as_ref() else { + return Ok(Self::OutOfDate); + }; + if *cache_info != CacheInfo::from_path(&archive)? { return Ok(Self::OutOfDate); } } @@ -153,7 +154,11 @@ impl RequirementSatisfaction { ext: _, url: _, } => { - let InstalledDist::Url(InstalledDirectUrlDist { direct_url, .. }) = &distribution + let InstalledDist::Url(InstalledDirectUrlDist { + direct_url, + cache_info, + .. + }) = &distribution else { return Ok(Self::Mismatch); }; @@ -184,11 +189,10 @@ impl RequirementSatisfaction { return Ok(Self::Mismatch); } - if !ArchiveTimestamp::up_to_date_with( - requested_path, - ArchiveTarget::Install(distribution), - )? { - trace!("Installed package is out of date"); + let Some(cache_info) = cache_info.as_ref() else { + return Ok(Self::OutOfDate); + }; + if *cache_info != CacheInfo::from_path(requested_path)? { return Ok(Self::OutOfDate); } @@ -200,7 +204,11 @@ impl RequirementSatisfaction { r#virtual: _, url: _, } => { - let InstalledDist::Url(InstalledDirectUrlDist { direct_url, .. }) = &distribution + let InstalledDist::Url(InstalledDirectUrlDist { + direct_url, + cache_info, + .. + }) = &distribution else { return Ok(Self::Mismatch); }; @@ -242,11 +250,10 @@ impl RequirementSatisfaction { return Ok(Self::Mismatch); } - if !ArchiveTimestamp::up_to_date_with( - requested_path, - ArchiveTarget::Install(distribution), - )? { - trace!("Installed package is out of date"); + let Some(cache_info) = cache_info.as_ref() else { + return Ok(Self::OutOfDate); + }; + if *cache_info != CacheInfo::from_path(requested_path)? { return Ok(Self::OutOfDate); } diff --git a/crates/uv-python/Cargo.toml b/crates/uv-python/Cargo.toml index c3c17a6ce952..75ecc9aceec1 100644 --- a/crates/uv-python/Cargo.toml +++ b/crates/uv-python/Cargo.toml @@ -21,6 +21,7 @@ pep508_rs = { workspace = true } platform-tags = { workspace = true } pypi-types = { workspace = true } uv-cache = { workspace = true } +uv-cache-info = { workspace = true } uv-client = { workspace = true } uv-extract = { workspace = true } uv-fs = { workspace = true } diff --git a/crates/uv-python/src/interpreter.rs b/crates/uv-python/src/interpreter.rs index 0f7d4aead09d..40a9d0397c81 100644 --- a/crates/uv-python/src/interpreter.rs +++ b/crates/uv-python/src/interpreter.rs @@ -18,7 +18,8 @@ use pep508_rs::{MarkerEnvironment, StringVersion}; use platform_tags::Platform; use platform_tags::{Tags, TagsError}; use pypi_types::{ResolverMarkerEnvironment, Scheme}; -use uv_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness, Timestamp}; +use uv_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness}; +use uv_cache_info::Timestamp; use uv_fs::{write_atomic_sync, PythonExt, Simplified}; use crate::implementation::LenientImplementationName; diff --git a/crates/uv-settings/Cargo.toml b/crates/uv-settings/Cargo.toml index ea893bde5b60..6be4b49b758f 100644 --- a/crates/uv-settings/Cargo.toml +++ b/crates/uv-settings/Cargo.toml @@ -17,6 +17,7 @@ distribution-types = { workspace = true, features = ["schemars"] } install-wheel-rs = { workspace = true, features = ["schemars", "clap"] } pep508_rs = { workspace = true } pypi-types = { workspace = true } +uv-cache-info = { workspace = true, features = ["schemars"] } uv-configuration = { workspace = true, features = ["schemars", "clap"] } uv-fs = { workspace = true } uv-macros = { workspace = true } diff --git a/crates/uv-settings/src/settings.rs b/crates/uv-settings/src/settings.rs index 84fcfec22fd6..48031e81d74a 100644 --- a/crates/uv-settings/src/settings.rs +++ b/crates/uv-settings/src/settings.rs @@ -6,6 +6,7 @@ use distribution_types::{FlatIndexLocation, IndexUrl}; use install_wheel_rs::linker::LinkMode; use pep508_rs::Requirement; use pypi_types::{SupportedEnvironments, VerbatimParsedUrl}; +use uv_cache_info::CacheKey; use uv_configuration::{ ConfigSettings, IndexStrategy, KeyringProviderType, PackageNameSpecifier, TargetTriple, TrustedHost, @@ -42,6 +43,38 @@ pub struct Options { #[option_group] pub pip: Option, + /// The keys to consider when caching builds for the project. + /// + /// Cache keys enable you to specify the files or directories that should trigger a rebuild when + /// modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, + /// or `setup.cfg` files in the project directory are modified, i.e.: + /// + /// ```toml + /// cache-keys = [{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }] + /// ``` + /// + /// As an example: if a project uses dynamic metadata to read its dependencies from a + /// `requirements.txt` file, you can specify `cache-keys = [{ file = "requirements.txt" }, { file = "pyproject.toml" }]` + /// to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in + /// addition to watching the `pyproject.toml`). + /// + /// Cache keys can also include version control information. For example, if a project uses + /// `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]` + /// to include the current Git commit hash in the cache key (in addition to the + /// `pyproject.toml`). + /// + /// Cache keys only affect the project defined by the `pyproject.toml` in which they're + /// specified (as opposed to, e.g., affecting all members in a workspace). + #[option( + default = r#"[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]"#, + value_type = "list[dict]", + example = r#" + cache-keys = [{ file = "pyproject.toml" }, { file = "requirements.txt" }, { git = true }] + "# + )] + #[serde(default, skip_serializing)] + cache_keys: Option>, + // NOTE(charlie): These fields are shared with `ToolUv` in // `crates/uv-workspace/src/pyproject.rs`, and the documentation lives on that struct. #[cfg_attr(feature = "schemars", schemars(skip))] diff --git a/crates/uv-types/src/traits.rs b/crates/uv-types/src/traits.rs index 1d4b029992da..f8e18f737746 100644 --- a/crates/uv-types/src/traits.rs +++ b/crates/uv-types/src/traits.rs @@ -9,7 +9,7 @@ use distribution_types::{ use pep508_rs::PackageName; use pypi_types::Requirement; use uv_cache::Cache; -use uv_configuration::{BuildKind, BuildOptions, BuildOutput, SourceStrategy}; +use uv_configuration::{BuildKind, BuildOptions, BuildOutput, ConfigSettings, SourceStrategy}; use uv_git::GitResolver; use uv_python::PythonEnvironment; @@ -68,6 +68,9 @@ pub trait BuildContext { /// This method exists to avoid fetching source distributions if we know we can't build them. fn build_options(&self) -> &BuildOptions; + /// The [`ConfigSettings`] used to build distributions. + fn config_settings(&self) -> &ConfigSettings; + /// Whether to incorporate `tool.uv.sources` when resolving requirements. fn sources(&self) -> SourceStrategy; diff --git a/crates/uv/Cargo.toml b/crates/uv/Cargo.toml index 5eca6b33e88a..738a24a290ef 100644 --- a/crates/uv/Cargo.toml +++ b/crates/uv/Cargo.toml @@ -24,6 +24,7 @@ platform-tags = { workspace = true } pypi-types = { workspace = true } uv-auth = { workspace = true } uv-cache = { workspace = true } +uv-cache-info = { workspace = true } uv-cli = { workspace = true } uv-client = { workspace = true } uv-configuration = { workspace = true } diff --git a/crates/uv/src/commands/tool/install.rs b/crates/uv/src/commands/tool/install.rs index eef29cea3d0f..cba65fc909fc 100644 --- a/crates/uv/src/commands/tool/install.rs +++ b/crates/uv/src/commands/tool/install.rs @@ -8,7 +8,8 @@ use pep440_rs::{VersionSpecifier, VersionSpecifiers}; use pep508_rs::MarkerTree; use pypi_types::{Requirement, RequirementSource}; use tracing::debug; -use uv_cache::{Cache, Refresh, Timestamp}; +use uv_cache::{Cache, Refresh}; +use uv_cache_info::Timestamp; use uv_client::{BaseClientBuilder, Connectivity}; use uv_configuration::{Concurrency, Upgrade}; use uv_normalize::PackageName; diff --git a/crates/uv/src/commands/tool/run.rs b/crates/uv/src/commands/tool/run.rs index cf85e93d0b4b..66008d36f5ba 100644 --- a/crates/uv/src/commands/tool/run.rs +++ b/crates/uv/src/commands/tool/run.rs @@ -14,7 +14,8 @@ use distribution_types::{Name, UnresolvedRequirementSpecification}; use pep440_rs::{VersionSpecifier, VersionSpecifiers}; use pep508_rs::MarkerTree; use pypi_types::{Requirement, RequirementSource}; -use uv_cache::{Cache, Refresh, Timestamp}; +use uv_cache::{Cache, Refresh}; +use uv_cache_info::Timestamp; use uv_cli::ExternalCommand; use uv_client::{BaseClientBuilder, Connectivity}; use uv_configuration::Concurrency; diff --git a/crates/uv/src/lib.rs b/crates/uv/src/lib.rs index df4780c0a9ba..12cf10047236 100644 --- a/crates/uv/src/lib.rs +++ b/crates/uv/src/lib.rs @@ -8,10 +8,10 @@ use anyhow::Result; use clap::error::{ContextKind, ContextValue}; use clap::{CommandFactory, Parser}; use owo_colors::OwoColorize; -use tracing::{debug, instrument}; - use settings::PipTreeSettings; -use uv_cache::{Cache, Refresh, Timestamp}; +use tracing::{debug, instrument}; +use uv_cache::{Cache, Refresh}; +use uv_cache_info::Timestamp; use uv_cli::{ compat::CompatArgs, CacheCommand, CacheNamespace, Cli, Commands, PipCommand, PipNamespace, ProjectCommand, diff --git a/crates/uv/tests/pip_install.rs b/crates/uv/tests/pip_install.rs index 43a5607b14e1..6015bcd7331f 100644 --- a/crates/uv/tests/pip_install.rs +++ b/crates/uv/tests/pip_install.rs @@ -2926,7 +2926,7 @@ requires-python = ">=3.8" "### ); - // Re-installing should be a no-op. + // Installing again should be a no-op. uv_snapshot!(context.filters(), context.pip_install() .arg("--editable") .arg(editable_dir.path()), @r###" @@ -2951,7 +2951,7 @@ requires-python = ">=3.8" "#, )?; - // Re-installing should update the package. + // Installing again should update the package. uv_snapshot!(context.filters(), context.pip_install() .arg("--editable") .arg(editable_dir.path()), @r###" @@ -3015,7 +3015,7 @@ dependencies = {file = ["requirements.txt"]} "### ); - // Re-installing should not re-install, as we don't special-case dynamic metadata. + // Installing again should not re-install, as we don't special-case dynamic metadata. uv_snapshot!(context.filters(), context.pip_install() .arg("--editable") .arg(editable_dir.path()), @r###" @@ -3068,7 +3068,7 @@ requires-python = ">=3.8" "### ); - // Re-installing should be a no-op. + // Installing again should be a no-op. uv_snapshot!(context.filters(), context.pip_install() .arg("example @ .") .current_dir(editable_dir.path()), @r###" @@ -3093,7 +3093,7 @@ requires-python = ">=3.8" "#, )?; - // Re-installing should update the package. + // Installing again should update the package. uv_snapshot!(context.filters(), context.pip_install() .arg("example @ .") .current_dir(editable_dir.path()), @r###" @@ -3115,6 +3115,228 @@ requires-python = ">=3.8" Ok(()) } +#[test] +fn invalidate_path_on_cache_key() -> Result<()> { + let context = TestContext::new("3.12"); + + // Create a local package. + let editable_dir = context.temp_dir.child("editable"); + editable_dir.create_dir_all()?; + let pyproject_toml = editable_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#"[project] + name = "example" + version = "0.0.0" + dependencies = ["anyio==4.0.0"] + requires-python = ">=3.8" + + [tool.uv] + cache-keys = ["constraints.txt", { file = "requirements.txt" }] +"#, + )?; + + let requirements_txt = editable_dir.child("requirements.txt"); + requirements_txt.write_str("idna")?; + + let constraints_txt = editable_dir.child("constraints.txt"); + constraints_txt.write_str("idna<3.4")?; + + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 4 packages in [TIME] + Installed 4 packages in [TIME] + + anyio==4.0.0 + + example==0.0.0 (from file://[TEMP_DIR]/editable) + + idna==3.6 + + sniffio==1.3.1 + "### + ); + + // Installing again should be a no-op. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Audited 1 package in [TIME] + "### + ); + + // Modify the constraints file. + constraints_txt.write_str("idna<3.5")?; + + // Installing again should update the package. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + ~ example==0.0.0 (from file://[TEMP_DIR]/editable) + "### + ); + + // Modify the requirements file. + requirements_txt.write_str("flask")?; + + // Installing again should update the package. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + ~ example==0.0.0 (from file://[TEMP_DIR]/editable) + "### + ); + + // Modify the `pyproject.toml` file (but not in a meaningful way). + pyproject_toml.write_str( + r#"[project] + name = "example" + version = "0.0.0" + dependencies = ["anyio==4.0.0"] + requires-python = ">=3.8" + + [tool.uv] + cache-keys = [{ file = "requirements.txt" }, "constraints.txt"] +"#, + )?; + + // Installing again should be a no-op, since `pyproject.toml` was not included as a cache key. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Audited 1 package in [TIME] + "### + ); + + Ok(()) +} + +#[test] +fn invalidate_path_on_commit() -> Result<()> { + let context = TestContext::new("3.12"); + + // Create a local package. + let editable_dir = context.temp_dir.child("editable"); + editable_dir.create_dir_all()?; + + let pyproject_toml = editable_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#" + [project] + name = "example" + version = "0.0.0" + dependencies = ["anyio==4.0.0"] + requires-python = ">=3.8" + + [tool.uv] + cache-keys = [{ git = true }] + "#, + )?; + + // Create a Git repository. + context + .temp_dir + .child(".git") + .child("HEAD") + .write_str("ref: refs/heads/main")?; + context + .temp_dir + .child(".git") + .child("refs") + .child("heads") + .child("main") + .write_str("1b6638fdb424e993d8354e75c55a3e524050c857")?; + + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 4 packages in [TIME] + Installed 4 packages in [TIME] + + anyio==4.0.0 + + example==0.0.0 (from file://[TEMP_DIR]/editable) + + idna==3.6 + + sniffio==1.3.1 + "### + ); + + // Installing again should be a no-op. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Audited 1 package in [TIME] + "### + ); + + // Change the current commit. + context + .temp_dir + .child(".git") + .child("refs") + .child("heads") + .child("main") + .write_str("a1a42cbd10d83bafd8600ba81f72bbef6c579385")?; + + // Installing again should update the package. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + ~ example==0.0.0 (from file://[TEMP_DIR]/editable) + "### + ); + + Ok(()) +} + /// Install from a direct path (wheel) with changed versions in the file name. #[test] fn path_name_version_change() { diff --git a/crates/uv/tests/pip_sync.rs b/crates/uv/tests/pip_sync.rs index eceb18943855..791b0f2b1cf1 100644 --- a/crates/uv/tests/pip_sync.rs +++ b/crates/uv/tests/pip_sync.rs @@ -3215,7 +3215,7 @@ requires-python = ">=3.8" "### ); - // Re-installing should be a no-op. + // Installing again should be a no-op. uv_snapshot!(context.filters(), context.pip_sync() .arg("requirements.in"), @r###" success: true diff --git a/docs/concepts/cache.md b/docs/concepts/cache.md index 3297633d6ff7..fa45fe1976f0 100644 --- a/docs/concepts/cache.md +++ b/docs/concepts/cache.md @@ -28,12 +28,33 @@ If you're running into caching issues, uv includes a few escape hatches: ## Dynamic metadata -Note that for local directory dependencies in particular (e.g., editables), uv will _only_ reinstall -the package if its `pyproject.toml`, `setup.py`, or `setup.cfg` file has changed. This is a +By default, uv will _only_ rebuild and reinstall local directory dependencies (e.g., editables) if +the `pyproject.toml`, `setup.py`, or `setup.cfg` file in the directory root has changed. This is a heuristic and, in some cases, may lead to fewer re-installs than desired. -For example, if a local dependency uses `dynamic` metadata, you can instruct uv to _always_ -reinstall the package by adding `reinstall-package` to the `uv` section of your `pyproject.toml`: +To incorporate other information into the cache key for a given package, you can add cache key +entries under `tool.uv.cache-key`, which can include both file paths and Git commit hashes. + +For example, if a project uses [`setuptools-scm`](https://pypi.org/project/setuptools-scm/), and +should be rebuilt whenever the commit hash changes, you can add the following to the project's +`pyproject.toml`: + +```toml title="pyproject.toml" +[tool.uv] +cache-key = [{ git = true }] +``` + +Similarly, if a project reads from a `requirements.txt` to populate its dependencies, you can add +the following to the project's `pyproject.toml`: + +```toml title="pyproject.toml" +[tool.uv] +cache-key = [{ file = "requirements.txt" }] +``` + +As an escape hatch, if a project uses `dynamic` metadata that isn't covered by `tool.uv.cache-key`, +you can instruct uv to _always_ rebuild and reinstall it by adding the project to the +`tool.uv.reinstall-package` list: ```toml title="pyproject.toml" [tool.uv] diff --git a/docs/reference/settings.md b/docs/reference/settings.md index 0d24a32b07e8..baea1c771d94 100644 --- a/docs/reference/settings.md +++ b/docs/reference/settings.md @@ -59,6 +59,52 @@ Linux, and `%LOCALAPPDATA%\uv\cache` on Windows. --- +#### [`cache-keys`](#cache-keys) {: #cache-keys } + +The keys to consider when caching builds for the project. + +Cache keys enable you to specify the files or directories that should trigger a rebuild when +modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, +or `setup.cfg` files in the project directory are modified, i.e.: + +```toml +cache-keys = [{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }] +``` + +As an example: if a project uses dynamic metadata to read its dependencies from a +`requirements.txt` file, you can specify `cache-keys = [{ file = "requirements.txt" }, { file = "pyproject.toml" }]` +to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in +addition to watching the `pyproject.toml`). + +Cache keys can also include version control information. For example, if a project uses +`setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]` +to include the current Git commit hash in the cache key (in addition to the +`pyproject.toml`). + +Cache keys only affect the project defined by the `pyproject.toml` in which they're +specified (as opposed to, e.g., affecting all members in a workspace). + +**Default value**: `[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]` + +**Type**: `list[dict]` + +**Example usage**: + +=== "pyproject.toml" + + ```toml + [tool.uv] + cache-keys = [{ file = "pyproject.toml" }, { file = "requirements.txt" }, { git = true }] + ``` +=== "uv.toml" + + ```toml + + cache-keys = [{ file = "pyproject.toml" }, { file = "requirements.txt" }, { git = true }] + ``` + +--- + #### [`compile-bytecode`](#compile-bytecode) {: #compile-bytecode } Compile Python files to bytecode after installation. diff --git a/uv.schema.json b/uv.schema.json index 2047d19346e9..7eb08ffce99a 100644 --- a/uv.schema.json +++ b/uv.schema.json @@ -21,6 +21,17 @@ "null" ] }, + "cache-keys": { + "description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`).\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace).", + "writeOnly": true, + "type": [ + "array", + "null" + ], + "items": { + "$ref": "#/definitions/CacheKey" + } + }, "compile-bytecode": { "description": "Compile Python files to bytecode after installation.\n\nBy default, uv does not compile Python (`.py`) files to bytecode (`__pycache__/*.pyc`); instead, compilation is performed lazily the first time a module is imported. For use-cases in which start time is critical, such as CLI applications and Docker containers, this option can be enabled to trade longer installation times for faster start times.\n\nWhen enabled, uv will process the entire site-packages directory (including packages that are not being modified by the current operation) for consistency. Like pip, it will also ignore errors.", "type": [ @@ -418,6 +429,40 @@ } ] }, + "CacheKey": { + "anyOf": [ + { + "description": "Ex) `\"Cargo.lock\"`", + "type": "string" + }, + { + "description": "Ex) `{ file = \"Cargo.lock\" }`", + "type": "object", + "required": [ + "file" + ], + "properties": { + "file": { + "type": "string" + } + }, + "additionalProperties": false + }, + { + "description": "Ex) `{ git = true }`", + "type": "object", + "required": [ + "git" + ], + "properties": { + "git": { + "type": "boolean" + } + }, + "additionalProperties": false + } + ] + }, "ConfigSettingValue": { "oneOf": [ {