Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute superset of existing and required hashes when healing cache #8955

Merged
merged 1 commit into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use uv_distribution_filename::WheelFilenameError;
use uv_fs::Simplified;
use uv_normalize::PackageName;
use uv_pep440::{Version, VersionSpecifiers};
use uv_pypi_types::{HashDigest, ParsedUrlError};
use uv_pypi_types::{HashAlgorithm, HashDigest, ParsedUrlError};

#[derive(Debug, thiserror::Error)]
pub enum Error {
Expand Down Expand Up @@ -94,8 +94,8 @@ pub enum Error {
MetadataLowering(#[from] MetadataError),
#[error("Distribution not found at: {0}")]
NotFound(Url),
#[error("Attempted to re-extract the source distribution for `{0}`, but the hashes didn't match. Run `{}` to clear the cache.", "uv cache clean".green())]
CacheHeal(String),
#[error("Attempted to re-extract the source distribution for `{0}`, but the {1} hash didn't match. Run `{}` to clear the cache.", "uv cache clean".green())]
CacheHeal(String, HashAlgorithm),
#[error("The source distribution requires Python {0}, but {1} is installed")]
RequiresPython(VersionSpecifiers, Version),

Expand Down
69 changes: 52 additions & 17 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use uv_fs::{rename_with_retry, write_atomic, LockedFile};
use uv_metadata::read_archive_metadata;
use uv_pep440::release_specifiers_to_ranges;
use uv_platform_tags::Tags;
use uv_pypi_types::{HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
use uv_types::{BuildContext, SourceBuildTrait};
use zip::ZipArchive;

Expand Down Expand Up @@ -641,8 +641,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Download the source distribution.
debug!("Downloading source distribution: {source}");
let entry = cache_shard.shard(revision.id()).entry(SOURCE);
let algorithms = hashes.algorithms();
let hashes = self
.download_archive(response, source, ext, entry.path(), hashes)
.download_archive(response, source, ext, entry.path(), &algorithms)
.await?;

Ok(revision.with_hashes(hashes))
Expand Down Expand Up @@ -928,8 +929,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Unzip the archive to a temporary directory.
debug!("Unpacking source distribution: {source}");
let entry = cache_shard.shard(revision.id()).entry(SOURCE);
let algorithms = hashes.algorithms();
let hashes = self
.persist_archive(&resource.path, resource.ext, entry.path(), hashes)
.persist_archive(&resource.path, resource.ext, entry.path(), &algorithms)
.await?;

// Include the hashes and cache info in the revision.
Expand Down Expand Up @@ -1525,11 +1527,25 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
hashes: HashPolicy<'_>,
) -> Result<Revision, Error> {
warn!("Re-extracting missing source distribution: {source}");

// Take the union of the requested and existing hash algorithms.
let algorithms = {
let mut algorithms = hashes.algorithms();
for digest in revision.hashes() {
algorithms.push(digest.algorithm());
}
algorithms.sort();
algorithms.dedup();
algorithms
};

let hashes = self
.persist_archive(&resource.path, resource.ext, entry.path(), hashes)
.persist_archive(&resource.path, resource.ext, entry.path(), &algorithms)
.await?;
if hashes != revision.hashes() {
return Err(Error::CacheHeal(source.to_string()));
for existing in revision.hashes() {
if !hashes.contains(existing) {
return Err(Error::CacheHeal(source.to_string(), existing.algorithm()));
}
}
Ok(revision.with_hashes(hashes))
}
Expand All @@ -1549,11 +1565,24 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let cache_entry = entry.shard().entry(HTTP_REVISION);
let download = |response| {
async {
// Take the union of the requested and existing hash algorithms.
let algorithms = {
let mut algorithms = hashes.algorithms();
for digest in revision.hashes() {
algorithms.push(digest.algorithm());
}
algorithms.sort();
algorithms.dedup();
algorithms
};

let hashes = self
.download_archive(response, source, ext, entry.path(), hashes)
.download_archive(response, source, ext, entry.path(), &algorithms)
.await?;
if hashes != revision.hashes() {
return Err(Error::CacheHeal(source.to_string()));
for existing in revision.hashes() {
if !hashes.contains(existing) {
return Err(Error::CacheHeal(source.to_string(), existing.algorithm()));
}
}
Ok(revision.with_hashes(hashes))
}
Expand Down Expand Up @@ -1581,7 +1610,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
source: &BuildableSource<'_>,
ext: SourceDistExtension,
target: &Path,
hashes: HashPolicy<'_>,
algorithms: &[HashAlgorithm],
) -> Result<Vec<HashDigest>, Error> {
let temp_dir = tempfile::tempdir_in(
self.build_context
Expand All @@ -1595,8 +1624,11 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.into_async_read();

// Create a hasher for each hash algorithm.
let algorithms = hashes.algorithms();
let mut hashers = algorithms.into_iter().map(Hasher::from).collect::<Vec<_>>();
let mut hashers = algorithms
.iter()
.copied()
.map(Hasher::from)
.collect::<Vec<_>>();
let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers);

// Download and unzip the source distribution into a temporary directory.
Expand All @@ -1605,7 +1637,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
drop(span);

// If necessary, exhaust the reader to compute the hash.
if !hashes.is_none() {
if !algorithms.is_empty() {
hasher.finish().await.map_err(Error::HashExhaustion)?;
}

Expand Down Expand Up @@ -1640,7 +1672,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
path: &Path,
ext: SourceDistExtension,
target: &Path,
hashes: HashPolicy<'_>,
algorithms: &[HashAlgorithm],
) -> Result<Vec<HashDigest>, Error> {
debug!("Unpacking for build: {}", path.display());

Expand All @@ -1655,15 +1687,18 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.map_err(Error::CacheRead)?;

// Create a hasher for each hash algorithm.
let algorithms = hashes.algorithms();
let mut hashers = algorithms.into_iter().map(Hasher::from).collect::<Vec<_>>();
let mut hashers = algorithms
.iter()
.copied()
.map(Hasher::from)
.collect::<Vec<_>>();
let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers);

// Unzip the archive into a temporary directory.
uv_extract::stream::archive(&mut hasher, ext, &temp_dir.path()).await?;

// If necessary, exhaust the reader to compute the hash.
if !hashes.is_none() {
if !algorithms.is_empty() {
hasher.finish().await.map_err(Error::HashExhaustion)?;
}

Expand Down
Loading