Skip to content

Commit

Permalink
uv-resolver: make hashes optional
Browse files Browse the repository at this point in the history
This only makes hashes optional for wheels/sdists that come from
registires or direct URLs. For wheels/sdists that come from other
sources, a hash should not be present.

For path dependencies, a hash should not be present because the state of
the path dependency is not intended to be tracked in the lock file. This
is consistent with how other tools deal with path dependencies, and if
it were otherwise, the hash would I believe need to be updated for every
change to the path dependency.

For git dependencies (source dists only), a hash should not be present
because the lock will contain the specific commit revision hash. This is
functionally equivalent to a hash, and so a hash is redundant.

As part of this change, we validate the presence or absence of a hash
based on the dependency source. We also add our first regression tests.
  • Loading branch information
BurntSushi committed May 10, 2024
1 parent 76a39c7 commit 616ed53
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 14 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-resolver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ uv-interpreter = { workspace = true }

once_cell = { version = "1.19.0" }
insta = { version = "1.36.1" }
toml = { workspace = true }

[features]
default = ["pypi"]
Expand Down
155 changes: 141 additions & 14 deletions crates/uv-resolver/src/lock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,23 @@ impl TryFrom<LockWire> for Lock {
));
}
}
// Also check that our sources are consistent with whether we have
// hashes or not.
let requires_hash = dist.id.source.kind.requires_hash();
if let Some(ref sdist) = dist.sourcedist {
if requires_hash != sdist.hash.is_some() {
return Err(LockError::hash(
dist.id.clone(),
"source distribution",
requires_hash,
));
}
}
for wheel in &dist.wheels {
if requires_hash != wheel.hash.is_some() {
return Err(LockError::hash(dist.id.clone(), "wheel", requires_hash));
}
}
}
Ok(Lock {
version: wire.version,
Expand Down Expand Up @@ -491,6 +508,17 @@ impl SourceKind {
SourceKind::Path => "path",
}
}

/// Returns true when this source kind requires a hash.
///
/// When this returns false, it also implies that a hash should
/// _not_ be present.
fn requires_hash(&self) -> bool {
match *self {
SourceKind::Registry | SourceKind::Direct => true,
SourceKind::Git(_) | SourceKind::Path => false,
}
}
}

/// NOTE: Care should be taken when adding variants to this enum. Namely, new
Expand Down Expand Up @@ -547,7 +575,11 @@ pub(crate) struct SourceDist {
/// and/or recording where the source dist file originally came from.
url: Url,
/// A hash of the source distribution.
hash: Hash,
///
/// This is only present for source distributions that come from registries
/// and direct URLs. Source distributions from git or path dependencies do
/// not have hashes associated with them.
hash: Option<Hash>,
}

impl SourceDist {
Expand Down Expand Up @@ -595,7 +627,10 @@ impl SourceDist {
.to_url()
.map_err(LockError::invalid_file_url)?;
let hash = Hash::from(reg_dist.file.hashes[0].clone());
Ok(SourceDist { url, hash })
Ok(SourceDist {
url,
hash: Some(hash),
})
}

fn from_direct_dist(direct_dist: &DirectUrlSourceDist) -> SourceDist {
Expand All @@ -609,16 +644,14 @@ impl SourceDist {
fn from_git_dist(git_dist: &GitSourceDist) -> SourceDist {
SourceDist {
url: git_dist.url.to_url(),
// TODO: We want a hash for the artifact at the URL.
hash: todo!(),
hash: None,
}
}

fn from_path_dist(path_dist: &PathSourceDist) -> SourceDist {
SourceDist {
url: path_dist.url.to_url(),
// TODO: We want a hash for the artifact at the URL.
hash: todo!(),
hash: None,
}
}
}
Expand All @@ -633,7 +666,11 @@ pub(crate) struct Wheel {
/// recording where the wheel file originally came from.
url: Url,
/// A hash of the source distribution.
hash: Hash,
///
/// This is only present for wheels that come from registries and direct
/// URLs. Wheels from git or path dependencies do not have hashes
/// associated with them.
hash: Option<Hash>,
/// The filename of the wheel.
///
/// This isn't part of the wire format since it's redundant with the
Expand Down Expand Up @@ -680,7 +717,7 @@ impl Wheel {
let hash = Hash::from(reg_dist.file.hashes[0].clone());
Ok(Wheel {
url,
hash,
hash: Some(hash),
filename,
})
}
Expand All @@ -697,8 +734,7 @@ impl Wheel {
fn from_path_dist(path_dist: &PathBuiltDist) -> Wheel {
Wheel {
url: path_dist.url.to_url(),
// TODO: We want a hash for the artifact at the URL.
hash: todo!(),
hash: None,
filename: path_dist.filename.clone(),
}
}
Expand All @@ -712,7 +748,11 @@ struct WheelWire {
/// recording where the wheel file originally came from.
url: Url,
/// A hash of the source distribution.
hash: Hash,
///
/// This is only present for wheels that come from registries and direct
/// URLs. Wheels from git or path dependencies do not have hashes
/// associated with them.
hash: Option<Hash>,
}

impl From<Wheel> for WheelWire {
Expand Down Expand Up @@ -854,6 +894,17 @@ impl LockError {
kind: Box::new(kind),
}
}

fn hash(id: DistributionId, artifact_type: &'static str, expected: bool) -> LockError {
let kind = LockErrorKind::Hash {
id,
artifact_type,
expected,
};
LockError {
kind: Box::new(kind),
}
}
}

impl std::error::Error for LockError {
Expand All @@ -863,6 +914,7 @@ impl std::error::Error for LockError {
LockErrorKind::DuplicateDependency { .. } => None,
LockErrorKind::InvalidFileUrl { ref err } => Some(err),
LockErrorKind::UnrecognizedDependency { ref err } => Some(err),
LockErrorKind::Hash { .. } => None,
}
}
}
Expand All @@ -871,15 +923,15 @@ impl std::fmt::Display for LockError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self.kind {
LockErrorKind::DuplicateDistribution { ref id } => {
write!(f, "found duplicate distribution {id}")
write!(f, "found duplicate distribution `{id}`")
}
LockErrorKind::DuplicateDependency {
ref id,
ref dependency_id,
} => {
write!(
f,
"for distribution {id}, found duplicate dependency {dependency_id}"
"for distribution `{id}`, found duplicate dependency `{dependency_id}`"
)
}
LockErrorKind::InvalidFileUrl { .. } => {
Expand All @@ -888,6 +940,30 @@ impl std::fmt::Display for LockError {
LockErrorKind::UnrecognizedDependency { .. } => {
write!(f, "found unrecognized dependency")
}
LockErrorKind::Hash {
ref id,
artifact_type,
expected: true,
} => {
write!(
f,
"since the distribution `{id}` comes from a {source} dependency, \
a hash was expected but one was not found for {artifact_type}",
source = id.source.kind.name(),
)
}
LockErrorKind::Hash {
ref id,
artifact_type,
expected: false,
} => {
write!(
f,
"since the distribution `{id}` comes from a {source} dependency, \
a hash was not expected but one was found for {artifact_type}",
source = id.source.kind.name(),
)
}
}
}
}
Expand Down Expand Up @@ -923,6 +999,17 @@ enum LockErrorKind {
/// The actual error.
err: UnrecognizedDependencyError,
},
/// An error that occurs when a hash is expected (or not) for a particular
/// artifact, but one was not found (or was).
Hash {
/// The ID of the distribution that has a missing hash.
id: DistributionId,
/// The specific type of artifact, e.g., "source distribution"
/// or "wheel".
artifact_type: &'static str,
/// When true, a hash is expected to be present.
expected: bool,
},
}

/// An error that occurs when there's an unrecognized dependency.
Expand Down Expand Up @@ -996,7 +1083,7 @@ impl std::fmt::Display for SourceParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let given = &self.given;
match self.kind {
SourceParseErrorKind::NoPlus => write!(f, "could not find '+' in source `{given}`"),
SourceParseErrorKind::NoPlus => write!(f, "could not find `+` in source `{given}`"),
SourceParseErrorKind::UnrecognizedSourceName { ref name } => {
write!(f, "unrecognized name `{name}` in source `{given}`")
}
Expand Down Expand Up @@ -1033,3 +1120,43 @@ impl std::fmt::Display for HashParseError {
self.0.fmt(f)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn hash_required_present() {
let data = r#"
version = 1
[[distribution]]
name = "anyio"
version = "4.3.0"
source = "registry+https://pypi.org/simple"
[[distribution.wheel]]
url = "https://files.pythonhosted.org/packages/14/fd/2f20c40b45e4fb4324834aea24bd4afdf1143390242c0b33774da0e2e34f/anyio-4.3.0-py3-none-any.whl"
"#;
let result: Result<Lock, _> = toml::from_str(data);
insta::assert_debug_snapshot!(result);
}

#[test]
fn hash_required_missing() {
let data = r#"
version = 1
[[distribution]]
name = "anyio"
version = "4.3.0"
source = "path+file:///foo/bar"
[[distribution.wheel]]
url = "file:///foo/bar/anyio-4.3.0-py3-none-any.whl"
hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8"
"#;
let result: Result<Lock, _> = toml::from_str(data);
insta::assert_debug_snapshot!(result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
source: crates/uv-resolver/src/lock.rs
expression: result
---
Err(
Error {
inner: Error {
inner: TomlError {
message: "since the distribution `anyio 4.3.0 path+file:///foo/bar` comes from a path dependency, a hash was not expected but one was found for wheel",
raw: None,
keys: [],
span: None,
},
},
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
source: crates/uv-resolver/src/lock.rs
expression: result
---
Err(
Error {
inner: Error {
inner: TomlError {
message: "since the distribution `anyio 4.3.0 registry+https://pypi.org/simple` comes from a registry dependency, a hash was expected but one was not found for wheel",
raw: None,
keys: [],
span: None,
},
},
},
)

0 comments on commit 616ed53

Please sign in to comment.