From f29d9618b3a2591608b0610187750187d8f749db Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Fri, 2 Aug 2024 02:43:40 -0400 Subject: [PATCH 01/13] implement marker trees using algebraic decision diagrams --- Cargo.lock | 24 + Cargo.toml | 1 + crates/pep440-rs/src/version_specifier.rs | 30 + crates/pep508-rs/Cargo.toml | 7 + crates/pep508-rs/src/lib.rs | 99 +- crates/pep508-rs/src/marker/algebra.rs | 835 +++++++ crates/pep508-rs/src/marker/mod.rs | 7 +- crates/pep508-rs/src/marker/parse.rs | 166 +- crates/pep508-rs/src/marker/simplify.rs | 361 +++ crates/pep508-rs/src/marker/tree.rs | 2123 +++++++++++------ crates/pep508-rs/src/unnamed.rs | 4 +- crates/pypi-types/src/requirement.rs | 24 +- ...__line-endings-poetry-with-hashes.txt.snap | 122 +- ...t__test__parse-poetry-with-hashes.txt.snap | 122 +- ...ts_txt__test__parse-unix-editable.txt.snap | 63 +- ...txt__test__parse-windows-editable.txt.snap | 63 +- crates/uv-configuration/src/constraints.rs | 5 +- crates/uv-configuration/src/overrides.rs | 26 +- crates/uv-pubgrub/Cargo.toml | 15 + .../specifier.rs => uv-pubgrub/src/lib.rs} | 10 +- crates/uv-requirements/src/source_tree.rs | 6 +- crates/uv-resolver/Cargo.toml | 1 + crates/uv-resolver/src/error.rs | 9 +- crates/uv-resolver/src/lock.rs | 21 +- crates/uv-resolver/src/marker.rs | 1146 +-------- crates/uv-resolver/src/pubgrub/mod.rs | 3 +- crates/uv-resolver/src/pubgrub/package.rs | 22 +- crates/uv-resolver/src/requires_python.rs | 11 +- crates/uv-resolver/src/resolution/display.rs | 4 +- crates/uv-resolver/src/resolution/graph.rs | 64 +- .../src/resolution/requirements_txt.rs | 20 +- crates/uv-resolver/src/resolver/fork_map.rs | 3 +- crates/uv-resolver/src/resolver/mod.rs | 84 +- .../src/resolver/resolver_markers.rs | 2 +- crates/uv/src/commands/pip/compile.rs | 14 +- crates/uv/tests/lock.rs | 16 +- crates/uv/tests/lock_scenarios.rs | 50 +- crates/uv/tests/pip_compile.rs | 74 +- 38 files changed, 3080 insertions(+), 2577 deletions(-) create mode 100644 crates/pep508-rs/src/marker/algebra.rs create mode 100644 crates/pep508-rs/src/marker/simplify.rs create mode 100644 crates/uv-pubgrub/Cargo.toml rename crates/{uv-resolver/src/pubgrub/specifier.rs => uv-pubgrub/src/lib.rs} (97%) diff --git a/Cargo.lock b/Cargo.lock index 569fcb18cfcf..bb8470d6f1fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -447,6 +447,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "boxcar" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "510a90332002c1af3317ef6b712f0dab697f30bbe809b86965eac2923c0bca8e" + [[package]] name = "brotli" version = "6.0.0" @@ -2456,16 +2462,22 @@ dependencies = [ name = "pep508_rs" version = "0.6.0" dependencies = [ + "boxcar", "derivative", + "indexmap", "insta", + "itertools 0.13.0", "log", "pep440_rs", + "pubgrub", "pyo3", "pyo3-log", "regex", + "rustc-hash 2.0.0", "schemars", "serde", "serde_json", + "smallvec", "testing_logger", "thiserror", "tracing", @@ -2473,6 +2485,7 @@ dependencies = [ "url", "uv-fs", "uv-normalize", + "uv-pubgrub", ] [[package]] @@ -4938,6 +4951,16 @@ dependencies = [ "serde", ] +[[package]] +name = "uv-pubgrub" +version = "0.0.1" +dependencies = [ + "itertools 0.13.0", + "pep440_rs", + "pubgrub", + "thiserror", +] + [[package]] name = "uv-python" version = "0.0.1" @@ -5068,6 +5091,7 @@ dependencies = [ "uv-fs", "uv-git", "uv-normalize", + "uv-pubgrub", "uv-python", "uv-types", "uv-warnings", diff --git a/Cargo.toml b/Cargo.toml index 7bd3237c0958..98339258879b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ uv-options-metadata = { path = "crates/uv-options-metadata" } uv-python = { path = "crates/uv-python" } uv-requirements = { path = "crates/uv-requirements" } uv-resolver = { path = "crates/uv-resolver" } +uv-pubgrub = { path = "crates/uv-pubgrub" } uv-scripts = { path = "crates/uv-scripts" } uv-settings = { path = "crates/uv-settings" } uv-shell = { path = "crates/uv-shell" } diff --git a/crates/pep440-rs/src/version_specifier.rs b/crates/pep440-rs/src/version_specifier.rs index 03bbd2872de2..72fb36830a30 100644 --- a/crates/pep440-rs/src/version_specifier.rs +++ b/crates/pep440-rs/src/version_specifier.rs @@ -405,6 +405,13 @@ impl VersionSpecifier { version, } } + /// `!=` + pub fn not_equals_version(version: Version) -> Self { + Self { + operator: Operator::NotEqual, + version, + } + } /// `>=` pub fn greater_than_equal_version(version: Version) -> Self { @@ -413,6 +420,29 @@ impl VersionSpecifier { version, } } + /// `>` + pub fn greater_than_version(version: Version) -> Self { + Self { + operator: Operator::GreaterThan, + version, + } + } + + /// `<=` + pub fn less_than_equal_version(version: Version) -> Self { + Self { + operator: Operator::LessThanEqual, + version, + } + } + + /// `<` + pub fn less_than_version(version: Version) -> Self { + Self { + operator: Operator::LessThan, + version, + } + } /// Get the operator, e.g. `>=` in `>= 2.0.0` pub fn operator(&self) -> &Operator { diff --git a/crates/pep508-rs/Cargo.toml b/crates/pep508-rs/Cargo.toml index 44827003ca8d..6d7d7c88a41f 100644 --- a/crates/pep508-rs/Cargo.toml +++ b/crates/pep508-rs/Cargo.toml @@ -21,6 +21,10 @@ workspace = true [dependencies] derivative = { workspace = true } +itertools = { workspace = true } +indexmap = { workspace = true } +pubgrub = { workspace = true } +rustc-hash = { workspace = true } pep440_rs = { workspace = true } pyo3 = { workspace = true, optional = true, features = ["abi3", "extension-module"] } pyo3-log = { workspace = true, optional = true } @@ -34,6 +38,9 @@ unicode-width = { workspace = true } url = { workspace = true, features = ["serde"] } uv-fs = { workspace = true } uv-normalize = { workspace = true } +uv-pubgrub = { workspace = true } +boxcar = "0.2.5" +smallvec = "1.13.2" [dev-dependencies] insta = { version = "1.36.1" } diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index 0d5b2c5c3f0e..215cfcb2dc03 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -39,9 +39,10 @@ use url::Url; use cursor::Cursor; pub use marker::{ - ExtraOperator, MarkerEnvironment, MarkerEnvironmentBuilder, MarkerExpression, MarkerOperator, - MarkerTree, MarkerValue, MarkerValueString, MarkerValueVersion, MarkerWarningKind, - StringVersion, + ContainsMarkerTree, ExtraMarkerTree, ExtraOperator, InMarkerTree, MarkerEnvironment, + MarkerEnvironmentBuilder, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeContents, + MarkerTreeKind, MarkerValue, MarkerValueString, MarkerValueVersion, MarkerWarningKind, + StringMarkerTree, StringVersion, VersionMarkerTree, }; pub use origin::RequirementOrigin; #[cfg(feature = "pyo3")] @@ -189,7 +190,7 @@ impl Display for Requirement { } } } - if let Some(marker) = &self.marker { + if let Some(marker) = self.marker.as_ref().and_then(MarkerTree::contents) { write!(f, " ; {marker}")?; } Ok(()) @@ -255,7 +256,10 @@ impl PyRequirement { /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"` #[getter] pub fn marker(&self) -> Option { - self.marker.as_ref().map(ToString::to_string) + self.marker + .as_ref() + .and_then(MarkerTree::contents) + .map(|marker| marker.to_string()) } /// Parses a PEP 440 string @@ -416,18 +420,20 @@ impl Requirement { #[must_use] pub fn with_extra_marker(self, extra: &ExtraName) -> Self { let marker = match self.marker { - Some(expression) => MarkerTree::And(vec![ - expression, - MarkerTree::Expression(MarkerExpression::Extra { + Some(mut marker) => { + let extra = MarkerTree::expression(MarkerExpression::Extra { operator: ExtraOperator::Equal, name: extra.clone(), - }), - ]), - None => MarkerTree::Expression(MarkerExpression::Extra { + }); + marker.and(extra); + marker + } + None => MarkerTree::expression(MarkerExpression::Extra { operator: ExtraOperator::Equal, name: extra.clone(), }), }; + Self { marker: Some(marker), ..self @@ -1043,7 +1049,7 @@ fn parse_pep508_requirement( let marker = if cursor.peek_char() == Some(';') { // Skip past the semicolon cursor.next(); - Some(marker::parse::parse_markers_cursor(cursor, reporter)?) + marker::parse::parse_markers_cursor(cursor, reporter)? } else { None }; @@ -1123,10 +1129,10 @@ mod tests { use uv_normalize::{ExtraName, InvalidNameError, PackageName}; use crate::cursor::Cursor; - use crate::marker::{ - parse, MarkerExpression, MarkerOperator, MarkerTree, MarkerValueString, MarkerValueVersion, + use crate::marker::{parse, MarkerExpression, MarkerTree, MarkerValueVersion}; + use crate::{ + MarkerOperator, MarkerValueString, Requirement, TracingReporter, VerbatimUrl, VersionOrUrl, }; - use crate::{Requirement, TracingReporter, VerbatimUrl, VersionOrUrl}; fn parse_pep508_err(input: &str) -> String { Requirement::::from_str(input) @@ -1216,7 +1222,7 @@ mod tests { .into_iter() .collect(), )), - marker: Some(MarkerTree::Expression(MarkerExpression::Version { + marker: Some(MarkerTree::expression(MarkerExpression::Version { key: MarkerValueVersion::PythonVersion, specifier: VersionSpecifier::from_pattern( pep440_rs::Operator::LessThan, @@ -1463,37 +1469,38 @@ mod tests { &mut Cursor::new(marker), &mut TracingReporter, ) + .unwrap() .unwrap(); - let expected = MarkerTree::And(vec![ - MarkerTree::Expression(MarkerExpression::Version { - key: MarkerValueVersion::PythonVersion, - specifier: VersionSpecifier::from_pattern( - pep440_rs::Operator::Equal, - "2.7".parse().unwrap(), - ) - .unwrap(), - }), - MarkerTree::Or(vec![ - MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::SysPlatform, - operator: MarkerOperator::Equal, - value: "win32".to_string(), - }), - MarkerTree::And(vec![ - MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::OsName, - operator: MarkerOperator::Equal, - value: "linux".to_string(), - }), - MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::ImplementationName, - operator: MarkerOperator::Equal, - value: "cpython".to_string(), - }), - ]), - ]), - ]); - assert_eq!(expected, actual); + + let mut a = MarkerTree::expression(MarkerExpression::Version { + key: MarkerValueVersion::PythonVersion, + specifier: VersionSpecifier::from_pattern( + pep440_rs::Operator::Equal, + "2.7".parse().unwrap(), + ) + .unwrap(), + }); + let mut b = MarkerTree::expression(MarkerExpression::String { + key: MarkerValueString::SysPlatform, + operator: MarkerOperator::Equal, + value: "win32".to_string(), + }); + let mut c = MarkerTree::expression(MarkerExpression::String { + key: MarkerValueString::OsName, + operator: MarkerOperator::Equal, + value: "linux".to_string(), + }); + let d = MarkerTree::expression(MarkerExpression::String { + key: MarkerValueString::ImplementationName, + operator: MarkerOperator::Equal, + value: "cpython".to_string(), + }); + + c.and(d); + b.or(c); + a.and(b); + + assert_eq!(a, actual); } #[test] diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs new file mode 100644 index 000000000000..17f948e272ad --- /dev/null +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -0,0 +1,835 @@ +//! This module implements marker tree operations using Algebraic Decision Diagrams (ADD). +//! +//! An ADD is a tree of decision nodes as well as two terminal nodes, `true` and `false`. Marker +//! variables are represented as decision nodes. The edge from a decision node to it's child +//! represents a particular assignment of a value to that variable. Depending on the type of +//! variable, an edge can be represented by binary values or a disjoint set of ranges, as opposed +//! to a traditional Binary Decision Diagram. +//! +//! For example, the marker `python_version > '3.7' and os_name == 'Linux'` creates the following +//! marker tree: +//! +//! ```text +//! python_version: +//! (> '3.7') -> os_name: +//! (> 'Linux') -> FALSE +//! (== 'Linux') -> TRUE +//! (< 'Linux') -> FALSE +//! (<= '3.7') -> FALSE +//! ``` +//! +//! Specifically, a marker tree is represented as a Reduced Ordered ADD. An ADD is ordered if +//! different variables appear in the same order on all paths from the root. Additionally, an ADD +//! is reduced if: +//! - Isomoprhic nodes are merged. +//! - Nodes with isomorphic children are eliminated. +//! +//! These two rules provide an important guarantee for marker trees: marker trees are canonical for +//! a given marker function and variable ordering. Because variable ordering is defined at compile-time, +//! this means any functionally equivalent marker trees are normalized upon construction. Importantly, +//! this means that we can identify trivially true marker trees, as well as unsatisfiable marker trees. +//! This provides important information to the resolver when forking. +//! +//! ADDs provide polynomial time operations such as conjunction and negation, which is important as marker +//! trees are combined during universal resolution. Because ADDs solve the SAT problem, constructing an +//! arbitrary ADD can theoretically take exponential time in the worst case. However, in practice, marker trees +//! have a limited number of variables and user-provided marker trees are typically very simple. +//! +//! Additionally, the implementation in this module uses complemented edges, meaning a marker tree and +//! it's complement are represented by the same node internally. This allows cheap constant-time marker +//! tree negation. +use std::cmp::Ordering; +use std::fmt; +use std::ops::Bound; +use std::sync::Mutex; +use std::sync::MutexGuard; + +use itertools::Either; +use pep440_rs::{Version, VersionSpecifier}; +use pubgrub::Range; +use rustc_hash::FxHashMap; +use std::sync::LazyLock; +use uv_normalize::ExtraName; +use uv_pubgrub::PubGrubSpecifier; + +use crate::ExtraOperator; +use crate::{MarkerExpression, MarkerOperator, MarkerValueString, MarkerValueVersion}; + +/// The global node interner. +pub(crate) static INTERNER: LazyLock = LazyLock::new(Interner::default); + +/// An interner for decision nodes. +/// +/// Interning decision nodes allows isomoprhic nodes to be automatically merged. +/// It also allows nodes to cheaply compared. +#[derive(Default)] +pub(crate) struct Interner { + pub shared: InternerShared, + state: Mutex, +} + +/// The shared part of an [`Interner`], which can be accessed without a lock. +#[derive(Default)] +pub(crate) struct InternerShared { + /// A list of unique [`Node`]s. + nodes: boxcar::Vec, +} + +/// The mutable [`Interner`] state, stored behind a lock. +#[derive(Default)] +struct InternerState { + /// A map from a [`Node`] to a unique [`NodeId`], representing an index + /// into [`InternerShared`]. + unique: FxHashMap, + + /// A cache for operations between two nodes. + cache: FxHashMap<(NodeId, NodeId), NodeId>, +} + +impl InternerShared { + /// Returns the node for the given [`NodeId`]. + pub(crate) fn node(&self, id: NodeId) -> &Node { + &self.nodes[id.index()] + } +} + +impl Interner { + /// Locks the interner state, returning a guard that can be used to perform marker + /// operations. + pub(crate) fn lock(&self) -> InternerGuard<'_> { + InternerGuard { + state: self.state.lock().unwrap(), + shared: &self.shared, + } + } +} + +/// A lock of [`InternerState`]. +pub(crate) struct InternerGuard<'a> { + state: MutexGuard<'a, InternerState>, + shared: &'a InternerShared, +} + +impl InternerGuard<'_> { + /// Creates a decision node with the given variable and children. + fn create_node(&mut self, var: Variable, children: Edges) -> NodeId { + let mut node = Node { var, children }; + let mut first = node.children.nodes().next().unwrap(); + + // With a complemented edge representation, there are two ways to represent the same node: + // complementing the root and all children edges results in the same node. To ensure markers + // are canonical, the first child edge is never complemented. + let mut flipped = false; + if first.is_complement() { + node = node.not(); + first = first.not(); + flipped = true; + } + + // Reduction: If all children refer to the same node, we eliminate the parent node + // and just return the child. + if node.children.nodes().all(|node| node == first) { + return if flipped { first.not() } else { first }; + } + + // Insert the node. + let id = self + .state + .unique + .entry(node.clone()) + .or_insert_with(|| NodeId::new(self.shared.nodes.push(node), false)); + + if flipped { + id.not() + } else { + *id + } + } + + /// Returns a decision node for a single marker expression. + pub(crate) fn expression(&mut self, expr: MarkerExpression) -> NodeId { + let (var, children) = match expr { + // A variable representing the output of a version key. Edges correspond + // to disjoint version ranges. + MarkerExpression::Version { key, specifier } => { + (Variable::Version(key), Edges::from_specifier(&specifier)) + } + // The `in` and `contains` operators are a bit different than other operators. + // In particular, they do not represent a particular value for the corresponding + // variable, and can overlap. For example, `'nux' in os_name` and `os_name == 'Linux'` + // can both be `true` in the same marker environment, and so cannot be represented by + // the same variable. Because of this, we represent `in` and `contains`, as well as + // their negations, as distinct variables, unrelated to the range of a given key. + // + // Note that in the presence of the `in` operator, we may not be able to simplify + // some marker trees to a constant `true` or `false`. For example, it is not trivial to + // detect that `os_name < 'z' and os_name in 'Linux'` is unsatisfiable. + MarkerExpression::String { + key, + operator: MarkerOperator::In, + value, + } => (Variable::In { key, value }, Edges::from_bool(true)), + MarkerExpression::String { + key, + operator: MarkerOperator::NotIn, + value, + } => (Variable::In { key, value }, Edges::from_bool(false)), + MarkerExpression::String { + key, + operator: MarkerOperator::Contains, + value, + } => (Variable::Contains { key, value }, Edges::from_bool(true)), + MarkerExpression::String { + key, + operator: MarkerOperator::NotContains, + value, + } => (Variable::Contains { key, value }, Edges::from_bool(false)), + // A variable representing the output of a string key. Edges correspond + // to disjoint string ranges. + MarkerExpression::String { + key, + operator, + value, + } => (Variable::String(key), Edges::from_string(operator, value)), + // A variable representing the existence or absence of a particular extra. + MarkerExpression::Extra { + name, + operator: ExtraOperator::Equal, + } => (Variable::Extra(name), Edges::from_bool(true)), + MarkerExpression::Extra { + name, + operator: ExtraOperator::NotEqual, + } => (Variable::Extra(name), Edges::from_bool(false)), + }; + + self.create_node(var, children) + } + + // Returns a decision node representing the disjunction of two nodes. + pub(crate) fn or(&mut self, x: NodeId, y: NodeId) -> NodeId { + // We take advantage of cheap negation here and implement OR in terms + // of it's DeMorgan complement. + self.and(x.not(), y.not()).not() + } + + // Returns a decision node representing the conjunction of two nodes. + pub(crate) fn and(&mut self, xi: NodeId, yi: NodeId) -> NodeId { + if xi == NodeId::TRUE { + return yi; + } + if yi == NodeId::TRUE { + return xi; + } + if xi == yi { + return xi; + } + if xi == NodeId::FALSE || yi == NodeId::FALSE { + return NodeId::FALSE; + } + + // X and Y are not equal but refer to the same node. + // Thus one is complement but not the other (X and not X). + if xi.index() == yi.index() { + return NodeId::FALSE; + } + + // The operation was memoized. + if let Some(result) = self.state.cache.get(&(xi, yi)) { + return *result; + } + + let (x, y) = (self.shared.node(xi), self.shared.node(yi)); + + // Perform Shannon Expansion of the higher order variable. + let (func, children) = match x.var.cmp(&y.var) { + // X is higher order than Y, apply Y to every child of X. + Ordering::Less => { + let children = x.children.map(xi, |node| self.and(node, yi)); + (x.var.clone(), children) + } + // Y is higher order than X, apply X to every child of Y. + Ordering::Greater => { + let children = y.children.map(yi, |node| self.and(node, xi)); + (y.var.clone(), children) + } + // X and Y represent the same variable, merge their children. + Ordering::Equal => { + let children = x.children.apply(xi, &y.children, yi, |x, y| self.and(x, y)); + (x.var.clone(), children) + } + }; + + // Create the output node. + let node = self.create_node(func, children); + + // Memoize the result of this operation. + // + // ADDs often contain duplicated subgraphs in distinct branches due to the restricted + // variable ordering. Memoizing allows ADD operations to remain polynomial time. + self.state.cache.insert((xi, yi), node); + + node + } + + // Restrict the output of a given boolean variable in the tree. + // + // This allows a tree to be simplified if a variable is known to be `true`. + pub(crate) fn restrict(&mut self, i: NodeId, f: &impl Fn(&Variable) -> Option) -> NodeId { + if matches!(i, NodeId::TRUE | NodeId::FALSE) { + return i; + } + + let node = self.shared.node(i); + if let Edges::Boolean { high, low } = node.children { + if let Some(value) = f(&node.var) { + // Restrict this variable to the given output by merging it + // with the relevant child. + let node = if value { high } else { low }; + return node.negate(i); + } + } + + // Restrict all nodes recursively. + let children = node.children.map(i, |node| self.restrict(node, f)); + self.create_node(node.var.clone(), children) + } + + // Restrict the output of a given version variable in the tree. + // + // This allows the tree to be simplified if a variable is known to be restricted to a + // particular range of outputs. + pub(crate) fn restrict_versions( + &mut self, + i: NodeId, + f: &impl Fn(&Variable) -> Option>, + ) -> NodeId { + if matches!(i, NodeId::TRUE | NodeId::FALSE) { + return i; + } + + let node = self.shared.node(i); + if let Edges::Version { edges: ref map } = node.children { + if let Some(allowed) = f(&node.var) { + // Restrict the output of this variable to the given range. + let mut simplified = SmallVec::new(); + for (range, node) in map { + let restricted = range.intersection(&allowed); + if restricted.is_empty() { + continue; + } + + simplified.push((restricted.clone(), *node)); + } + + return self + .create_node(node.var.clone(), Edges::Version { edges: simplified }) + .negate(i); + } + } + + // Restrict all nodes recursively. + let children = node.children.map(i, |node| self.restrict_versions(node, f)); + self.create_node(node.var.clone(), children) + } +} + +/// A unique variable for a decision node. +/// +/// This `enum` also defines the variable ordering for all ADDs. +/// Variable ordering is an interesting property of ADDs. A bad ordering +/// can lead to exponential explosion of the size of an ADD. However, +/// dynamically computing an optimal ordering is NP-complete. +/// +/// We may wish to investigate the effect of this ordering on common marker +/// trees. However, marker trees are typically small, so this may not be high +/// impact. +#[derive(PartialOrd, Ord, PartialEq, Eq, Hash, Clone, Debug)] +pub(crate) enum Variable { + /// A version marker, such as `python_version`. + /// + /// This is the highest order variable as it typically contains the most complex + /// ranges, allowing us to merge ranges at the top-level. + Version(MarkerValueVersion), + /// A string marker, such as `os_name`. + String(MarkerValueString), + /// A variable representing a ` in ` expression for a particular + /// string marker and value. + In { + key: MarkerValueString, + value: String, + }, + /// A variable representing a ` in ` expression for a particular + /// string marker and value. + Contains { + key: MarkerValueString, + value: String, + }, + /// A variable representing the existence or absence of a given extra. + /// + /// We keep extras at the leaves of the tree, so when simplifying extras we can + /// trivially remove the leaves without having to reconstruct the entire tree. + Extra(ExtraName), +} + +/// A decision node in an Algebraic Decision Diagram. +#[derive(PartialEq, Eq, Hash, Clone, Debug)] +pub(crate) struct Node { + /// The variable this node represents. + pub(crate) var: Variable, + /// The children of this node, with edges representing the possible outputs + /// of this variable. + pub(crate) children: Edges, +} + +impl Node { + /// Return the complement of this node, flipping all children IDs. + fn not(self) -> Node { + Node { + var: self.var, + children: self.children.not(), + } + } +} + +/// An ID representing a unique decision node. +/// +/// The lowest bit of the ID is used represent complemented edges. +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct NodeId(usize); + +impl NodeId { + // The terminal node representing `true`, or a trivially `true` node. + pub(crate) const TRUE: NodeId = NodeId(0); + + // The terminal node representing `false`, or an unsatisifable node. + pub(crate) const FALSE: NodeId = NodeId(1); + + /// Create a new, optionally complemented, [`NodeId`] with the given index. + fn new(index: usize, complement: bool) -> NodeId { + NodeId(((index + 1) << 1) | usize::from(complement)) + } + + /// Returns the index of this ID, ignoring the complemented edge. + fn index(self) -> usize { + (self.0 >> 1) - 1 + } + + /// Returns `true` if this ID represents a complemented edge. + fn is_complement(self) -> bool { + (self.0 & 1) == 1 + } + + /// Returns `true` if this node represents an unsatisfiable node. + pub(crate) fn is_false(self) -> bool { + self == NodeId::FALSE + } + + /// Returns `true` if this node represents a trivially `true` node. + pub(crate) fn is_true(self) -> bool { + self == NodeId::TRUE + } + + /// Returns the complement of this node. + pub(crate) fn not(self) -> NodeId { + NodeId(self.0 ^ 1) + } + + /// Returns the complement of this node, if it's parent is complemented. + /// + /// This method is useful to restore the complemented state of children nodes + /// when traversing the tree. + pub(crate) fn negate(self, parent: NodeId) -> NodeId { + if parent.is_complement() { + self.not() + } else { + self + } + } +} + +/// A [`SmallVec`] with enough elements to hold two constant edges, as well as the +/// ranges in-between. +type SmallVec = smallvec::SmallVec<[T; 5]>; + +/// The edges of a decision node. +#[derive(PartialEq, Eq, Hash, Clone, Debug)] +#[allow(clippy::large_enum_variant)] // Nodes are interned. +pub(crate) enum Edges { + // The edges of a version variable, representing a disjoint set of ranges that cover + // the output space. + // + // Note that all ranges are simple, meaning they can be represented by a bounded interval + // without gaps. + Version { + edges: SmallVec<(Range, NodeId)>, + }, + // The edges of a string variable, representing a disjoint set of ranges that cover + // the output space. + // + // Note that all ranges are simple, meaning they can be represented by a bounded interval + // without gaps. + String { + edges: SmallVec<(Range, NodeId)>, + }, + // The edges of a boolean variable, representing `true` or `false` values. + Boolean { + high: NodeId, + low: NodeId, + }, +} + +impl Edges { + /// Returns the [`Edges`] for a boolean variable. + fn from_bool(complemented: bool) -> Edges { + if complemented { + Edges::Boolean { + high: NodeId::TRUE, + low: NodeId::FALSE, + } + } else { + Edges::Boolean { + high: NodeId::FALSE, + low: NodeId::TRUE, + } + } + } + + /// Returns the [`Edges`] for a string expression. + fn from_string(operator: MarkerOperator, value: String) -> Edges { + let range: Range = match operator { + MarkerOperator::Equal => Range::singleton(value), + MarkerOperator::NotEqual => Range::singleton(value).complement(), + MarkerOperator::GreaterThan => Range::strictly_higher_than(value), + MarkerOperator::GreaterEqual => Range::higher_than(value), + MarkerOperator::LessThan => Range::strictly_lower_than(value), + MarkerOperator::LessEqual => Range::lower_than(value), + MarkerOperator::TildeEqual => unreachable!("string comparisons with ~= are ignored"), + _ => unreachable!("`in` and `contains` are treated as boolean variables"), + }; + + Edges::String { + edges: Edges::from_range(&range), + } + } + + /// Returns the [`Edges`] for a version specifier. + fn from_specifier(specifier: &VersionSpecifier) -> Edges { + // The decision diagram relies on the assumption that the negation of a marker tree + // is the complement of the marker space. However, pre-release versions violate + // this assumption. For example, the marker `python_version > '3.9' or python_version <= '3.9'` + // does not match `python_version == 3.9.0a0`. However, it's negation, + // `python_version > '3.9' and python_version <= '3.9'` also does not include `3.9.0a0`, and is + // actually `false`. + // + // For this reason we ignore pre-release versions completely when evaluating markers. + let specifier = PubGrubSpecifier::from_release_specifier(specifier).unwrap(); + Edges::Version { + edges: Edges::from_range(&specifier.into()), + } + } + + /// Returns an [`Edges`] where values in the given range are `true`. + fn from_range(range: &Range) -> SmallVec<(Range, NodeId)> + where + T: Ord + Clone, + { + let complement = range.complement(); + + let mut edges = SmallVec::new(); + + // Add the `true` edges. + for (start, end) in range.iter() { + let range = Range::from_range_bounds((start.clone(), end.clone())); + edges.push((range, NodeId::TRUE)); + } + + // Add the `false` edges. + for (start, end) in complement.iter() { + let range = Range::from_range_bounds((start.clone(), end.clone())); + edges.push((range, NodeId::FALSE)); + } + + // Sort the ranges. + // + // The ranges are disjoint so we don't care about equality. + edges.sort_by(|(range1, _), (range2, _)| compare_disjoint_range_start(range1, range2)); + edges + } + + /// Merge two [`Edges`], applying the given function to all disjoint, intersecting edges. + fn apply( + &self, + parent: NodeId, + map2: &Edges, + parent2: NodeId, + mut apply: impl FnMut(NodeId, NodeId) -> NodeId, + ) -> Edges { + match (self, map2) { + // Version or string variables, merge the ranges. + (Edges::Version { edges: map }, Edges::Version { edges: map2 }) => Edges::Version { + edges: Edges::apply_ranges(map, parent, map2, parent2, apply), + }, + (Edges::String { edges: map }, Edges::String { edges: map2 }) => Edges::String { + edges: Edges::apply_ranges(map, parent, map2, parent2, apply), + }, + // Boolean variables, simply merge the low and high nodes. + ( + Edges::Boolean { high, low }, + Edges::Boolean { + high: high2, + low: low2, + }, + ) => Edges::Boolean { + high: apply(high.negate(parent), high2.negate(parent)), + low: apply(low.negate(parent), low2.negate(parent)), + }, + _ => unreachable!(), + } + } + + /// Merge two range maps, applying the given function to all disjoint, intersecting ranges. + fn apply_ranges( + map: &SmallVec<(Range, NodeId)>, + parent: NodeId, + map2: &SmallVec<(Range, NodeId)>, + parent2: NodeId, + mut apply: impl FnMut(NodeId, NodeId) -> NodeId, + ) -> SmallVec<(Range, NodeId)> + where + T: Clone + Ord, + { + let mut combined = SmallVec::new(); + for (range, node) in map { + // Split the two maps into a set of disjoint and overlapping ranges, merging the + // intersections. + // + // Note that restrict ranges (see `restrict_versions`) makes finding intersections + // a bit more complicated despite the ranges being sorted. We cannot simply zip both + // sets, as they may contain arbitrary gaps. Instead, we use a quadratic search for + // simplicity as the set of ranges for a given variable is typically very small. + for (range2, node2) in map2 { + let intersection = range2.intersection(range); + if intersection.is_empty() { + // TODO(ibraheem): take advantage of the sorted ranges to `break` early + continue; + } + + // Merge the intersection. + let node = apply(node.negate(parent), node2.negate(parent2)); + match combined.last_mut() { + // Combine ranges if possible. + Some((range, prev)) if *prev == node && can_conjoin(range, &intersection) => { + *range = range.union(&intersection); + } + _ => combined.push((intersection.clone(), node)), + } + } + } + + combined + } + + // Apply the given function to all direct children of this node. + fn map(&self, parent: NodeId, mut f: impl FnMut(NodeId) -> NodeId) -> Edges { + match self { + Edges::Version { edges: map } => Edges::Version { + edges: map + .iter() + .cloned() + .map(|(range, node)| (range, f(node.negate(parent)))) + .collect(), + }, + Edges::String { edges: map } => Edges::String { + edges: map + .iter() + .cloned() + .map(|(range, node)| (range, f(node.negate(parent)))) + .collect(), + }, + Edges::Boolean { high, low } => Edges::Boolean { + low: f(low.negate(parent)), + high: f(high.negate(parent)), + }, + } + } + + // Returns an iterator over all direct children of this node. + fn nodes(&self) -> impl Iterator + '_ { + match self { + Edges::Version { edges: map } => { + Either::Left(Either::Left(map.iter().map(|(_, node)| *node))) + } + Edges::String { edges: map } => { + Either::Left(Either::Right(map.iter().map(|(_, node)| *node))) + } + Edges::Boolean { high, low } => Either::Right([*high, *low].into_iter()), + } + } + + // Returns the complement of this [`Edges`]. + fn not(self) -> Edges { + match self { + Edges::Version { edges: map } => Edges::Version { + edges: map + .into_iter() + .map(|(range, node)| (range, node.not())) + .collect(), + }, + Edges::String { edges: map } => Edges::String { + edges: map + .into_iter() + .map(|(range, node)| (range, node.not())) + .collect(), + }, + Edges::Boolean { high, low } => Edges::Boolean { + high: high.not(), + low: low.not(), + }, + } + } +} + +/// Compares the start of two ranges that are known to be disjoint. +fn compare_disjoint_range_start(range1: &Range, range2: &Range) -> Ordering +where + T: Ord, +{ + let (upper1, _) = range1.bounding_range().unwrap(); + let (upper2, _) = range2.bounding_range().unwrap(); + + match (upper1, upper2) { + (Bound::Unbounded, _) => Ordering::Less, + (_, Bound::Unbounded) => Ordering::Greater, + (Bound::Included(v1), Bound::Excluded(v2)) if v1 == v2 => Ordering::Less, + (Bound::Excluded(v1), Bound::Included(v2)) if v1 == v2 => Ordering::Greater, + // Note that the ranges are disjoint, so their lower bounds cannot be equal. + (Bound::Included(v1) | Bound::Excluded(v1), Bound::Included(v2) | Bound::Excluded(v2)) => { + v1.cmp(v2) + } + } +} + +/// Returns `true` if two disjoint ranges can be conjoined seamlessly without introducing a gap. +fn can_conjoin(range1: &Range, range2: &Range) -> bool +where + T: Ord + Clone, +{ + let Some((_, end)) = range1.bounding_range() else { + return false; + }; + let Some((start, _)) = range2.bounding_range() else { + return false; + }; + + match (end, start) { + (Bound::Included(v1), Bound::Excluded(v2)) if v1 == v2 => true, + (Bound::Excluded(v1), Bound::Included(v2)) if v1 == v2 => true, + _ => false, + } +} + +impl fmt::Debug for NodeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if *self == NodeId::FALSE { + return write!(f, "false"); + } + + if *self == NodeId::TRUE { + return write!(f, "true"); + } + + if self.is_complement() { + write!(f, "{:?}", INTERNER.shared.node(*self).clone().not()) + } else { + write!(f, "{:?}", INTERNER.shared.node(*self)) + } + } +} + +#[cfg(test)] +mod tests { + use super::{NodeId, INTERNER}; + use crate::MarkerExpression; + + fn expr(s: &str) -> NodeId { + INTERNER + .lock() + .expression(MarkerExpression::from_str(s).unwrap().unwrap()) + } + + #[test] + fn basic() { + let m = || INTERNER.lock(); + let extra_foo = expr("extra == 'foo'"); + assert!(!extra_foo.is_false()); + + let os_foo = expr("os_name == 'foo'"); + let extra_and_os_foo = m().or(extra_foo, os_foo); + assert!(!extra_and_os_foo.is_false()); + assert!(!m().and(extra_foo, os_foo).is_false()); + + let trivially_true = m().or(extra_and_os_foo, extra_and_os_foo.not()); + assert!(!trivially_true.is_false()); + assert!(trivially_true.is_true()); + + let trivially_false = m().and(extra_foo, extra_foo.not()); + assert!(trivially_false.is_false()); + + let e = m().or(trivially_false, os_foo); + assert!(!e.is_false()); + + let extra_not_foo = expr("extra != 'foo'"); + assert!(m().and(extra_foo, extra_not_foo).is_false()); + assert!(m().or(extra_foo, extra_not_foo).is_true()); + + let os_geq_bar = expr("os_name >= 'bar'"); + assert!(!os_geq_bar.is_false()); + + let os_le_bar = expr("os_name < 'bar'"); + assert!(m().and(os_geq_bar, os_le_bar).is_false()); + assert!(m().or(os_geq_bar, os_le_bar).is_true()); + + let os_leq_bar = expr("os_name <= 'bar'"); + assert!(!m().and(os_geq_bar, os_leq_bar).is_false()); + assert!(m().or(os_geq_bar, os_leq_bar).is_true()); + } + + #[test] + fn version() { + let m = || INTERNER.lock(); + let eq_3 = expr("python_version == '3'"); + let neq_3 = expr("python_version != '3'"); + let geq_3 = expr("python_version >= '3'"); + let leq_3 = expr("python_version <= '3'"); + + let eq_2 = expr("python_version == '2'"); + let eq_1 = expr("python_version == '1'"); + assert!(m().and(eq_2, eq_1).is_false()); + + assert_eq!(eq_3.not(), neq_3); + assert_eq!(eq_3, neq_3.not()); + + assert!(m().and(eq_3, neq_3).is_false()); + assert!(m().or(eq_3, neq_3).is_true()); + + assert_eq!(m().and(eq_3, geq_3), eq_3); + assert_eq!(m().and(eq_3, leq_3), eq_3); + + assert_eq!(m().and(geq_3, leq_3), eq_3); + + assert!(!m().and(geq_3, leq_3).is_false()); + assert!(m().or(geq_3, leq_3).is_true()); + } + + #[test] + fn simplify() { + let m = || INTERNER.lock(); + let x86 = expr("platform_machine == 'x86_64'"); + let not_x86 = expr("platform_machine != 'x86_64'"); + let windows = expr("platform_machine == 'Windows'"); + + let a = m().and(x86, windows); + let b = m().and(not_x86, windows); + assert_eq!(m().or(a, b), windows); + } +} diff --git a/crates/pep508-rs/src/marker/mod.rs b/crates/pep508-rs/src/marker/mod.rs index 50060c581e0f..0bf200c97810 100644 --- a/crates/pep508-rs/src/marker/mod.rs +++ b/crates/pep508-rs/src/marker/mod.rs @@ -9,12 +9,15 @@ //! outcomes. This implementation tries to carefully validate everything and emit warnings whenever //! bogus comparisons with unintended semantics are made. +mod algebra; mod environment; pub(crate) mod parse; +mod simplify; mod tree; pub use environment::{MarkerEnvironment, MarkerEnvironmentBuilder}; pub use tree::{ - ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue, MarkerValueString, - MarkerValueVersion, MarkerWarningKind, StringVersion, + ContainsMarkerTree, ExtraMarkerTree, ExtraOperator, InMarkerTree, MarkerExpression, + MarkerOperator, MarkerTree, MarkerTreeContents, MarkerTreeKind, MarkerValue, MarkerValueString, + MarkerValueVersion, MarkerWarningKind, StringMarkerTree, StringVersion, VersionMarkerTree, }; diff --git a/crates/pep508-rs/src/marker/parse.rs b/crates/pep508-rs/src/marker/parse.rs index 29f93bfc112f..605ff34ed53c 100644 --- a/crates/pep508-rs/src/marker/parse.rs +++ b/crates/pep508-rs/src/marker/parse.rs @@ -118,7 +118,7 @@ pub(crate) fn parse_marker_value( pub(crate) fn parse_marker_key_op_value( cursor: &mut Cursor, reporter: &mut impl Reporter, -) -> Result> { +) -> Result, Pep508Error> { cursor.eat_whitespace(); let l_value = parse_marker_value(cursor)?; cursor.eat_whitespace(); @@ -139,25 +139,14 @@ pub(crate) fn parse_marker_key_op_value( MarkerWarningKind::Pep440Error, format!( "Expected double quoted PEP 440 version to compare with {key}, - found {r_value}, will evaluate to false" + found {r_value}, will be ignored" ), ); - return Ok(MarkerExpression::arbitrary( - MarkerValue::MarkerEnvVersion(key), - operator, - r_value, - )); + return Ok(None); }; - match parse_version_expr(key.clone(), operator, &value, reporter) { - Some(expr) => expr, - None => MarkerExpression::arbitrary( - MarkerValue::MarkerEnvVersion(key), - operator, - MarkerValue::QuotedString(value), - ), - } + parse_version_expr(key.clone(), operator, &value, reporter) } // The only sound choice for this is ` ` MarkerValue::MarkerEnvString(key) => { @@ -168,24 +157,29 @@ pub(crate) fn parse_marker_key_op_value( reporter.report( MarkerWarningKind::MarkerMarkerComparison, "Comparing two markers with each other doesn't make any sense, - will evaluate to false" + will be ignored" .to_string(), ); - return Ok(MarkerExpression::arbitrary( - MarkerValue::MarkerEnvString(key), - operator, - r_value, - )); + return Ok(None); } MarkerValue::QuotedString(r_string) => r_string, }; - MarkerExpression::String { + if operator == MarkerOperator::TildeEqual { + reporter.report( + MarkerWarningKind::LexicographicComparison, + "Can't compare strings with `~=`, will be ignored".to_string(), + ); + + return Ok(None); + } + + Some(MarkerExpression::String { key, operator, value, - } + }) } // `extra == '...'` MarkerValue::Extra => { @@ -196,73 +190,46 @@ pub(crate) fn parse_marker_key_op_value( reporter.report( MarkerWarningKind::ExtraInvalidComparison, "Comparing extra with something other than a quoted string is wrong, - will evaluate to false" + will be ignored" .to_string(), ); - return Ok(MarkerExpression::arbitrary(l_value, operator, r_value)); + return Ok(None); } MarkerValue::QuotedString(value) => value, }; - match parse_extra_expr(operator, &value, reporter) { - Some(expr) => expr, - None => MarkerExpression::arbitrary( - MarkerValue::Extra, - operator, - MarkerValue::QuotedString(value), - ), - } + parse_extra_expr(operator, &value, reporter) } // This is either MarkerEnvVersion, MarkerEnvString or Extra inverted MarkerValue::QuotedString(l_string) => { match r_value { // The only sound choice for this is ` ` MarkerValue::MarkerEnvVersion(key) => { - match parse_inverted_version_expr(&l_string, operator, key.clone(), reporter) { - Some(expr) => expr, - None => MarkerExpression::arbitrary( - MarkerValue::QuotedString(l_string), - operator, - MarkerValue::MarkerEnvVersion(key), - ), - } + parse_inverted_version_expr(&l_string, operator, key.clone(), reporter) } // '...' == - MarkerValue::MarkerEnvString(key) => MarkerExpression::String { + MarkerValue::MarkerEnvString(key) => Some(MarkerExpression::String { key, // Invert the operator to normalize the expression order. operator: operator.invert(), value: l_string, - }, + }), // `'...' == extra` - MarkerValue::Extra => match parse_extra_expr(operator, &l_string, reporter) { - Some(expr) => expr, - None => MarkerExpression::arbitrary( - MarkerValue::QuotedString(l_string), - operator, - MarkerValue::Extra, - ), - }, + MarkerValue::Extra => parse_extra_expr(operator, &l_string, reporter), // `'...' == '...'`, doesn't make much sense MarkerValue::QuotedString(_) => { // Not even pypa/packaging 22.0 supports this // https://github.com/pypa/packaging/issues/632 - let expr = MarkerExpression::arbitrary( - MarkerValue::QuotedString(l_string), - operator, - r_value, - ); - reporter.report( MarkerWarningKind::StringStringComparison, format!( "Comparing two quoted strings with each other doesn't make sense: - {expr}, will evaluate to false" + '{l_string}' {operator} {r_value}, will be ignored" ), ); - expr + None } } } @@ -287,7 +254,7 @@ fn parse_version_expr( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version to compare with {key}, found {value}, - will evaluate to false: {err}" + will be ignored: {err}" ), ); @@ -300,7 +267,7 @@ fn parse_version_expr( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version operator to compare {key} with '{version}', - found '{marker_operator}', will evaluate to false", + found '{marker_operator}', will be ignored", version = pattern.version() ), ); @@ -342,7 +309,7 @@ fn parse_inverted_version_expr( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version to compare with {key}, found {value}, - will evaluate to false: {err}" + will be ignored: {err}" ), ); @@ -355,7 +322,7 @@ fn parse_inverted_version_expr( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version operator to compare {key} with '{version}', - found '{marker_operator}', will evaluate to false" + found '{marker_operator}', will be ignored" ), ); @@ -388,7 +355,7 @@ fn parse_extra_expr( Err(err) => { reporter.report( MarkerWarningKind::ExtraInvalidComparison, - format!("Expected extra name, found '{value}', will evaluate to false: {err}"), + format!("Expected extra name, found '{value}', will be ignored: {err}"), ); return None; @@ -402,9 +369,10 @@ fn parse_extra_expr( reporter.report( MarkerWarningKind::ExtraInvalidComparison, "Comparing extra with something other than a quoted string is wrong, - will evaluate to false" + will be ignored" .to_string(), ); + None } @@ -415,16 +383,14 @@ fn parse_extra_expr( fn parse_marker_expr( cursor: &mut Cursor, reporter: &mut impl Reporter, -) -> Result> { +) -> Result, Pep508Error> { cursor.eat_whitespace(); if let Some(start_pos) = cursor.eat_char('(') { let marker = parse_marker_or(cursor, reporter)?; cursor.next_expect_char(')', start_pos)?; Ok(marker) } else { - Ok(MarkerTree::Expression(parse_marker_key_op_value( - cursor, reporter, - )?)) + Ok(parse_marker_key_op_value(cursor, reporter)?.map(MarkerTree::expression)) } } @@ -435,8 +401,8 @@ fn parse_marker_expr( fn parse_marker_and( cursor: &mut Cursor, reporter: &mut impl Reporter, -) -> Result> { - parse_marker_op(cursor, "and", MarkerTree::And, parse_marker_expr, reporter) +) -> Result, Pep508Error> { + parse_marker_op(cursor, "and", MarkerTree::and, parse_marker_expr, reporter) } /// ```text @@ -446,29 +412,37 @@ fn parse_marker_and( fn parse_marker_or( cursor: &mut Cursor, reporter: &mut impl Reporter, -) -> Result> { - parse_marker_op(cursor, "or", MarkerTree::Or, parse_marker_and, reporter) +) -> Result, Pep508Error> { + parse_marker_op( + cursor, + "or", + MarkerTree::or, + |cursor, reporter| parse_marker_and(cursor, reporter), + reporter, + ) } /// Parses both `marker_and` and `marker_or` +#[allow(clippy::type_complexity)] fn parse_marker_op( cursor: &mut Cursor, op: &str, - op_constructor: fn(Vec) -> MarkerTree, - parse_inner: fn(&mut Cursor, &mut R) -> Result>, + apply: fn(&mut MarkerTree, MarkerTree), + parse_inner: fn(&mut Cursor, &mut R) -> Result, Pep508Error>, reporter: &mut R, -) -> Result> { +) -> Result, Pep508Error> { + let mut tree = None; + // marker_and or marker_expr let first_element = parse_inner(cursor, reporter)?; - // wsp* - cursor.eat_whitespace(); - // Check if we're done here instead of invoking the whole vec allocating loop - if matches!(cursor.peek_char(), None | Some(')')) { - return Ok(first_element); + + if let Some(expression) = first_element { + match tree { + Some(ref mut tree) => apply(tree, expression), + None => tree = Some(expression), + } } - let mut expressions = Vec::with_capacity(1); - expressions.push(first_element); loop { // wsp* cursor.eat_whitespace(); @@ -477,17 +451,15 @@ fn parse_marker_op( match cursor.slice(start, len) { value if value == op => { cursor.take_while(|c| !c.is_whitespace()); - let expression = parse_inner(cursor, reporter)?; - expressions.push(expression); - } - _ => { - // Build minimal trees - return if expressions.len() == 1 { - Ok(expressions.remove(0)) - } else { - Ok(op_constructor(expressions)) - }; + + if let Some(expression) = parse_inner(cursor, reporter)? { + match tree { + Some(ref mut tree) => apply(tree, expression), + None => tree = Some(expression), + } + } } + _ => return Ok(tree), } } } @@ -498,7 +470,7 @@ fn parse_marker_op( pub(crate) fn parse_markers_cursor( cursor: &mut Cursor, reporter: &mut impl Reporter, -) -> Result> { +) -> Result, Pep508Error> { let marker = parse_marker_or(cursor, reporter)?; cursor.eat_whitespace(); if let Some((pos, unexpected)) = cursor.next() { @@ -513,6 +485,7 @@ pub(crate) fn parse_markers_cursor( input: cursor.to_string(), }); }; + Ok(marker) } @@ -523,5 +496,8 @@ pub(crate) fn parse_markers( reporter: &mut impl Reporter, ) -> Result> { let mut chars = Cursor::new(markers); - parse_markers_cursor(&mut chars, reporter) + + // If the tree consisted entirely of arbitrary expressions + // that were ignored, it evaluates to true. + parse_markers_cursor(&mut chars, reporter).map(|result| result.unwrap_or(MarkerTree::TRUE)) } diff --git a/crates/pep508-rs/src/marker/simplify.rs b/crates/pep508-rs/src/marker/simplify.rs new file mode 100644 index 000000000000..c34581ceb00b --- /dev/null +++ b/crates/pep508-rs/src/marker/simplify.rs @@ -0,0 +1,361 @@ +use std::fmt; +use std::ops::Bound; + +use indexmap::IndexMap; +use itertools::Itertools; +use pep440_rs::VersionSpecifier; +use pubgrub::Range; +use rustc_hash::FxBuildHasher; + +use crate::{ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeKind}; + +/// Returns a simplified DNF expression for a given marker tree. +pub(crate) fn to_dnf(tree: &MarkerTree) -> Vec> { + let mut dnf = Vec::new(); + collect_dnf(tree, &mut dnf, &mut Vec::new()); + simplify(&mut dnf); + dnf +} + +/// Walk a [`MarkerTree`] recursively and construct a DNF expression. +/// +/// A decision diagram can be converted to DNF form by walking the tree and collecting +/// all paths to a `true` terminal node. +fn collect_dnf( + tree: &MarkerTree, + dnf: &mut Vec>, + path: &mut Vec, +) { + match tree.kind() { + // Reached a `false` node, meaning the conjunction is irrelevant for DNF. + MarkerTreeKind::False => {} + // Reached a solution, store the conjunction. + MarkerTreeKind::True => { + if !path.is_empty() { + dnf.push(path.clone()); + } + } + MarkerTreeKind::Version(marker) => { + for (tree, range) in collect_edges(marker.edges()) { + // Detect whether the range for this edge can be simplified as an inequality. + if let Some(excluded) = range_inequality(&range) { + let current = path.len(); + for version in excluded { + path.push(MarkerExpression::Version { + key: marker.key().clone(), + specifier: VersionSpecifier::not_equals_version(version.clone()), + }); + } + + collect_dnf(&tree, dnf, path); + path.truncate(current); + continue; + } + + for bounds in range.iter() { + let current = path.len(); + for specifier in VersionSpecifier::from_bounds(bounds) { + path.push(MarkerExpression::Version { + key: marker.key().clone(), + specifier, + }); + } + + collect_dnf(&tree, dnf, path); + path.truncate(current); + } + } + } + MarkerTreeKind::String(marker) => { + for (tree, range) in collect_edges(marker.children()) { + // Detect whether the range for this edge can be simplified as an inequality. + if let Some(excluded) = range_inequality(&range) { + let current = path.len(); + for value in excluded { + path.push(MarkerExpression::String { + key: marker.key().clone(), + operator: MarkerOperator::NotEqual, + value: value.clone(), + }); + } + + collect_dnf(&tree, dnf, path); + path.truncate(current); + continue; + } + + for bounds in range.iter() { + let current = path.len(); + for (operator, value) in MarkerOperator::from_bounds(bounds) { + path.push(MarkerExpression::String { + key: marker.key().clone(), + operator, + value: value.clone(), + }); + } + + collect_dnf(&tree, dnf, path); + path.truncate(current); + } + } + } + MarkerTreeKind::In(marker) => { + for (value, tree) in marker.children() { + let operator = if value { + MarkerOperator::In + } else { + MarkerOperator::NotIn + }; + + let expr = MarkerExpression::String { + key: marker.key().clone(), + value: marker.value().to_owned(), + operator, + }; + + path.push(expr); + collect_dnf(&tree, dnf, path); + path.pop(); + } + } + MarkerTreeKind::Contains(marker) => { + for (value, tree) in marker.children() { + let operator = if value { + MarkerOperator::Contains + } else { + MarkerOperator::NotContains + }; + + let expr = MarkerExpression::String { + key: marker.key().clone(), + value: marker.value().to_owned(), + operator, + }; + + path.push(expr); + collect_dnf(&tree, dnf, path); + path.pop(); + } + } + MarkerTreeKind::Extra(marker) => { + for (value, tree) in marker.children() { + let operator = if value { + ExtraOperator::Equal + } else { + ExtraOperator::NotEqual + }; + + let expr = MarkerExpression::Extra { + name: marker.name().clone(), + operator, + }; + + path.push(expr); + collect_dnf(&tree, dnf, path); + path.pop(); + } + } + } +} + +/// Simplifies a DNF expression. +/// +/// A decision tree is canonical, but only for a given variable order. Depending on the +/// pre-defined order, the DNF expression produced by a decision tree can still be further +/// simplified. +/// +/// Completely simplifying a DNF expression is NP-hard and amounts to the set cover +/// problem. Additionally, marker expressions can contain complex expressions involving +/// version ranges that are not trivial to simplify. +/// +/// Instead, we choose to simplify at the boolean variable level without any truth table +/// expansion. Combined with the normalization applied by decision trees, this seems to be +/// sufficient in practice. +/// +/// Note: This function has quadratic time complexity. However, it not applied on every marker +/// operation, only to user facing output, which are typically very simple. +fn simplify(dnf: &mut Vec>) { + for i in 0..dnf.len() { + let clause = &dnf[i]; + + // Find redundant terms in this clause. + let mut redundant_terms = Vec::new(); + 'term: for (skipped, skipped_term) in clause.iter().enumerate() { + for (j, other_clause) in dnf.iter().enumerate() { + if i == j { + continue; + } + + // Let X be this clause with a given term A set to it's negation. + // If there exists another clause that is a subset of X, the term A is + // redundant in this clause. + // + // For example, `A or (not A and B)` can be simplified to `A or B`, + // eliminating the `not A` term. + if other_clause.iter().all(|term| { + if term == skipped_term { + return false; + } + + // TODO(ibraheem): if we intern variables we could reduce this + // from a linear search to an integer `HashSet` lookup + clause.contains(term) || is_negated(term, skipped_term) + }) { + redundant_terms.push(skipped); + continue 'term; + } + } + } + + // Eliminate any redundant terms. + redundant_terms.sort_by(|a, b| b.cmp(a)); + for term in redundant_terms { + dnf[i].remove(term); + } + } + + // Once we have eliminated redundant terms, there may also be redundant clauses. + // For example, `(A and B) or (not A and B)` would have been simplified above to + // `(A and B) or B` and can now be further simplified to just `B`. + let mut redundant_clauses = Vec::new(); + 'clause: for i in 0..dnf.len() { + let clause = &dnf[i]; + + for (j, other_clause) in dnf.iter().enumerate() { + // Ignore clauses that are going to be eliminated. + if i == j || redundant_clauses.contains(&j) { + continue; + } + + // There is another clause that is a subset of this one, thus this clause is redundant. + if other_clause.iter().all(|term| { + // TODO(ibraheem): if we intern variables we could reduce this + // from a linear search to an integer `HashSet` lookup + clause.contains(term) + }) { + redundant_clauses.push(i); + continue 'clause; + } + } + } + + // Eliminate any redundant clauses. + for i in redundant_clauses.into_iter().rev() { + dnf.remove(i); + } +} + +/// Merge any edges that lead to identical subtrees into a single range. +fn collect_edges<'a, T>( + map: impl ExactSizeIterator, MarkerTree)>, +) -> IndexMap, FxBuildHasher> +where + T: Ord + Clone + 'a, +{ + let len = map.len(); + + let mut paths: IndexMap<_, Range<_>, FxBuildHasher> = IndexMap::default(); + for (i, (range, tree)) in map.enumerate() { + let (mut start, mut end) = range.bounding_range().unwrap(); + match (start, end) { + (Bound::Included(v1), Bound::Included(v2)) if v1 == v2 => {} + _ => { + // Expand the range of this variable to be unbounded. + // This helps remove redundant expressions such as `python_version >= '3.7'` + // when the range has already been restricted to `['3.7', inf)` + if i == 0 { + start = Bound::Unbounded; + } + if i == len - 1 { + end = Bound::Unbounded; + } + } + } + + // Combine the ranges. + let range = Range::from_range_bounds((start.cloned(), end.cloned())); + paths + .entry(tree) + .and_modify(|union| *union = union.union(&range)) + .or_insert_with(|| range.clone()); + } + + paths +} + +/// Returns `Some` if the expression can be simplified as an inequality consisting +/// of the given values. +/// +/// For example, `os_name < 'Linux'` and `os_name > 'Linux'` can be simplified to +/// `os_name != 'Linux'`. +fn range_inequality(range: &Range) -> Option> +where + T: Ord + Clone + fmt::Debug, +{ + if range.is_empty() || range.bounding_range() != Some((Bound::Unbounded, Bound::Unbounded)) { + return None; + } + + let mut excluded = Vec::new(); + for ((_, end), (start, _)) in range.iter().tuple_windows() { + match (end, start) { + (Bound::Excluded(v1), Bound::Excluded(v2)) if v1 == v2 => excluded.push(v1), + _ => return None, + } + } + + Some(excluded) +} + +/// Returns `true` if the LHS is the negation of the RHS, or vice versa. +fn is_negated(left: &MarkerExpression, right: &MarkerExpression) -> bool { + match left { + MarkerExpression::Version { key, specifier } => { + let MarkerExpression::Version { + key: key2, + specifier: specifier2, + } = right + else { + return false; + }; + + key == key2 + && specifier.version() == specifier2.version() + && specifier + .operator() + .negate() + .is_some_and(|negated| negated == *specifier2.operator()) + } + MarkerExpression::String { + key, + operator, + value, + } => { + let MarkerExpression::String { + key: key2, + operator: operator2, + value: value2, + } = right + else { + return false; + }; + + key == key2 + && value == value2 + && operator + .negate() + .is_some_and(|negated| negated == *operator2) + } + MarkerExpression::Extra { operator, name } => { + let MarkerExpression::Extra { + name: name2, + operator: operator2, + } = right + else { + return false; + }; + + name == name2 && operator.negate() == *operator2 + } + } +} diff --git a/crates/pep508-rs/src/marker/tree.rs b/crates/pep508-rs/src/marker/tree.rs index dc3c22ef7b8c..9f93ec9b215e 100644 --- a/crates/pep508-rs/src/marker/tree.rs +++ b/crates/pep508-rs/src/marker/tree.rs @@ -1,13 +1,14 @@ use std::collections::HashSet; -use std::fmt::{Display, Formatter}; -use std::ops::Deref; +use std::fmt::{self, Display, Formatter}; +use std::ops::{Bound, Deref}; use std::str::FromStr; +use pubgrub::Range; #[cfg(feature = "pyo3")] use pyo3::{basic::CompareOp, pyclass, pymethods}; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; -use pep440_rs::{Version, VersionParseError, VersionPattern, VersionSpecifier}; +use pep440_rs::{Version, VersionParseError, VersionSpecifier}; use uv_normalize::ExtraName; use crate::cursor::Cursor; @@ -16,6 +17,9 @@ use crate::{ MarkerEnvironment, Pep508Error, Pep508ErrorSource, Pep508Url, Reporter, TracingReporter, }; +use super::algebra::{Edges, NodeId, Variable, INTERNER}; +use super::simplify; + /// Ways in which marker evaluation can fail #[derive(Debug, Eq, Hash, Ord, PartialOrd, PartialEq, Clone, Copy)] #[cfg_attr(feature = "pyo3", pyclass(module = "pep508"))] @@ -243,11 +247,28 @@ impl MarkerOperator { } } + /// Inverts this marker operator. + pub(crate) fn invert(self) -> MarkerOperator { + match self { + Self::LessThan => Self::GreaterThan, + Self::LessEqual => Self::GreaterEqual, + Self::GreaterThan => Self::LessThan, + Self::GreaterEqual => Self::LessEqual, + Self::Equal => Self::Equal, + Self::NotEqual => Self::NotEqual, + Self::TildeEqual => Self::TildeEqual, + Self::In => Self::Contains, + Self::NotIn => Self::NotContains, + Self::Contains => Self::In, + Self::NotContains => Self::NotIn, + } + } + /// Negates this marker operator. /// /// If a negation doesn't exist, which is only the case for ~=, then this /// returns `None`. - fn negate(self) -> Option { + pub(crate) fn negate(self) -> Option { Some(match self { Self::Equal => Self::NotEqual, Self::NotEqual => Self::Equal, @@ -263,20 +284,41 @@ impl MarkerOperator { }) } - /// Inverts this marker operator. - pub(crate) fn invert(self) -> MarkerOperator { - match self { - Self::LessThan => Self::GreaterThan, - Self::LessEqual => Self::GreaterEqual, - Self::GreaterThan => Self::LessThan, - Self::GreaterEqual => Self::LessEqual, - Self::Equal => Self::Equal, - Self::NotEqual => Self::NotEqual, - Self::TildeEqual => Self::TildeEqual, - Self::In => Self::Contains, - Self::NotIn => Self::NotContains, - Self::Contains => Self::In, - Self::NotContains => Self::NotIn, + /// Returns the marker operator and value whose union represents the given range. + pub fn from_bounds( + bounds: (&Bound, &Bound), + ) -> impl Iterator { + let (b1, b2) = match bounds { + (Bound::Included(v1), Bound::Included(v2)) if v1 == v2 => { + (Some((MarkerOperator::Equal, v1.clone())), None) + } + (Bound::Excluded(v1), Bound::Excluded(v2)) if v1 == v2 => { + (Some((MarkerOperator::NotEqual, v1.clone())), None) + } + (lower, upper) => ( + MarkerOperator::from_lower_bound(lower), + MarkerOperator::from_upper_bound(upper), + ), + }; + + b1.into_iter().chain(b2) + } + + /// Returns a value specifier representing the given lower bound. + pub fn from_lower_bound(bound: &Bound) -> Option<(MarkerOperator, String)> { + match bound { + Bound::Included(value) => Some((MarkerOperator::GreaterEqual, value.clone())), + Bound::Excluded(value) => Some((MarkerOperator::GreaterThan, value.clone())), + Bound::Unbounded => None, + } + } + + /// Returns a value specifier representing the given upper bound. + pub fn from_upper_bound(bound: &Bound) -> Option<(MarkerOperator, String)> { + match bound { + Bound::Included(value) => Some((MarkerOperator::LessEqual, value.clone())), + Bound::Excluded(value) => Some((MarkerOperator::LessThan, value.clone())), + Bound::Unbounded => None, } } } @@ -407,14 +449,6 @@ pub enum MarkerExpression { operator: ExtraOperator, name: ExtraName, }, - /// An invalid or meaningless expression, such as '...' == '...'. - /// - /// Invalid expressions always evaluate to `false`, and are warned for during parsing. - Arbitrary { - l_value: MarkerValue, - operator: MarkerOperator, - r_value: MarkerValue, - }, } /// The operator for an extra expression, either '==' or '!='. @@ -427,6 +461,9 @@ pub enum ExtraOperator { } impl ExtraOperator { + /// Creates a [`ExtraOperator`] from an equivalent [`MarkerOperator`]. + /// + /// Returns `None` if the operator is not supported for extras. pub(crate) fn from_marker_operator(operator: MarkerOperator) -> Option { match operator { MarkerOperator::Equal => Some(ExtraOperator::Equal), @@ -435,7 +472,8 @@ impl ExtraOperator { } } - fn negate(&self) -> ExtraOperator { + /// Negates this operator. + pub(crate) fn negate(&self) -> ExtraOperator { match *self { ExtraOperator::Equal => ExtraOperator::NotEqual, ExtraOperator::NotEqual => ExtraOperator::Equal, @@ -454,7 +492,10 @@ impl Display for ExtraOperator { impl MarkerExpression { /// Parse a [`MarkerExpression`] from a string with the given reporter. - pub fn parse_reporter(s: &str, reporter: &mut impl Reporter) -> Result { + pub fn parse_reporter( + s: &str, + reporter: &mut impl Reporter, + ) -> Result, Pep508Error> { let mut chars = Cursor::new(s); let expression = parse::parse_marker_key_op_value(&mut chars, reporter)?; chars.eat_whitespace(); @@ -468,249 +509,16 @@ impl MarkerExpression { input: chars.to_string(), }); } - Ok(expression) - } - - /// Negates this marker expression. - /// - /// In most cases, this returns a `MarkerTree::Expression`, but in some - /// cases it can be more complicated than that. For example, the negation - /// of a compatible version constraint is a disjunction. - /// - /// Additionally, in some cases, the negation reflects the "spirit" of what - /// the marker expression is. For example, the negation of an "arbitrary" - /// expression will still result in an expression that is always false. - fn negate(&self) -> MarkerTree { - match *self { - MarkerExpression::Version { - ref key, - ref specifier, - } => { - let (op, version) = (specifier.operator(), specifier.version().clone()); - match op.negate() { - None => negate_compatible_version(key.clone(), version), - Some(op) => { - // OK because this can only fail with either local versions, - // which we avoid by construction, or if the op is ~=, which - // is never the result of negating an op. - let specifier = - VersionSpecifier::from_version(op, version.without_local()).unwrap(); - let expr = MarkerExpression::Version { - key: key.clone(), - specifier, - }; - MarkerTree::Expression(expr) - } - } - } - MarkerExpression::String { - ref key, - ref operator, - ref value, - } => { - let expr = MarkerExpression::String { - key: key.clone(), - // negating ~= doesn't make sense in this context, but - // I believe it is technically allowed, so we just leave - // it as-is. - operator: operator.negate().unwrap_or(MarkerOperator::TildeEqual), - value: value.clone(), - }; - MarkerTree::Expression(expr) - } - MarkerExpression::Extra { - ref operator, - ref name, - } => { - let expr = MarkerExpression::Extra { - operator: operator.negate(), - name: name.clone(), - }; - MarkerTree::Expression(expr) - } - // "arbitrary" expressions always return false, and while the - // negation logically implies they should always return true, we do - // not do that here because it violates the spirit of a meaningly - // or "arbitrary" marker. We flip the operator but do nothing else. - MarkerExpression::Arbitrary { - ref l_value, - ref operator, - ref r_value, - } => { - let expr = MarkerExpression::Arbitrary { - l_value: l_value.clone(), - // negating ~= doesn't make sense in this context, but - // I believe it is technically allowed, so we just leave - // it as-is. - operator: operator.negate().unwrap_or(MarkerOperator::TildeEqual), - r_value: r_value.clone(), - }; - MarkerTree::Expression(expr) - } - } - } - /// Evaluate a <`marker_value`> <`marker_op`> <`marker_value`> expression - /// - /// When `env` is `None`, all expressions that reference the environment - /// will evaluate as `true`. - fn evaluate( - &self, - env: Option<&MarkerEnvironment>, - extras: &[ExtraName], - reporter: &mut impl Reporter, - ) -> bool { - match self { - MarkerExpression::Version { key, specifier } => env - .map(|env| specifier.contains(env.get_version(key))) - .unwrap_or(true), - MarkerExpression::String { - key, - operator, - value, - } => env - .map(|env| { - let l_string = env.get_string(key); - Self::compare_strings(l_string, *operator, value, reporter) - }) - .unwrap_or(true), - MarkerExpression::Extra { - operator: ExtraOperator::Equal, - name, - } => extras.contains(name), - MarkerExpression::Extra { - operator: ExtraOperator::NotEqual, - name, - } => !extras.contains(name), - MarkerExpression::Arbitrary { .. } => false, - } + Ok(expression) } - /// Evaluates only the extras and python version part of the markers. We use this during - /// dependency resolution when we want to have packages for all possible environments but - /// already know the extras and the possible python versions (from `requires-python`) - /// - /// This considers only expression in the from `extra == '...'`, `'...' == extra`, - /// `python_version '...'` and - /// `'...' python_version`. - /// - /// Note that unlike [`Self::evaluate`] this does not perform any checks for bogus expressions but - /// will simply return true. + /// Parse a [`MarkerExpression`] from a string. /// - /// ```rust - /// # use std::collections::HashSet; - /// # use std::str::FromStr; - /// # use pep508_rs::{MarkerTree, Pep508Error}; - /// # use pep440_rs::Version; - /// # use uv_normalize::ExtraName; - /// - /// # fn main() -> Result<(), Pep508Error> { - /// let marker_tree = MarkerTree::from_str(r#"("linux" in sys_platform) and extra == 'day'"#)?; - /// let versions: Vec = (8..12).map(|minor| Version::new([3, minor])).collect(); - /// assert!(marker_tree.evaluate_extras_and_python_version(&[ExtraName::from_str("day").unwrap()].into(), &versions)); - /// assert!(!marker_tree.evaluate_extras_and_python_version(&[ExtraName::from_str("night").unwrap()].into(), &versions)); - /// - /// let marker_tree = MarkerTree::from_str(r#"extra == 'day' and python_version < '3.11' and '3.10' <= python_version"#)?; - /// assert!(!marker_tree.evaluate_extras_and_python_version(&[ExtraName::from_str("day").unwrap()].into(), &vec![Version::new([3, 9])])); - /// assert!(marker_tree.evaluate_extras_and_python_version(&[ExtraName::from_str("day").unwrap()].into(), &vec![Version::new([3, 10])])); - /// assert!(!marker_tree.evaluate_extras_and_python_version(&[ExtraName::from_str("day").unwrap()].into(), &vec![Version::new([3, 11])])); - /// # Ok(()) - /// # } - /// ``` - fn evaluate_extras_and_python_version( - &self, - extras: &HashSet, - python_versions: &[Version], - ) -> bool { - match self { - MarkerExpression::Version { - key: MarkerValueVersion::PythonVersion, - specifier, - } => python_versions - .iter() - .any(|l_version| specifier.contains(l_version)), - MarkerExpression::Extra { - operator: ExtraOperator::Equal, - name, - } => extras.contains(name), - MarkerExpression::Extra { - operator: ExtraOperator::NotEqual, - name, - } => !extras.contains(name), - _ => true, - } - } - - /// Compare strings by PEP 508 logic, with warnings - fn compare_strings( - l_string: &str, - operator: MarkerOperator, - r_string: &str, - reporter: &mut impl Reporter, - ) -> bool { - match operator { - MarkerOperator::Equal => l_string == r_string, - MarkerOperator::NotEqual => l_string != r_string, - MarkerOperator::GreaterThan => { - reporter.report( - MarkerWarningKind::LexicographicComparison, - format!("Comparing {l_string} and {r_string} lexicographically"), - ); - l_string > r_string - } - MarkerOperator::GreaterEqual => { - reporter.report( - MarkerWarningKind::LexicographicComparison, - format!("Comparing {l_string} and {r_string} lexicographically"), - ); - l_string >= r_string - } - MarkerOperator::LessThan => { - reporter.report( - MarkerWarningKind::LexicographicComparison, - format!("Comparing {l_string} and {r_string} lexicographically"), - ); - l_string < r_string - } - MarkerOperator::LessEqual => { - reporter.report( - MarkerWarningKind::LexicographicComparison, - format!("Comparing {l_string} and {r_string} lexicographically"), - ); - l_string <= r_string - } - MarkerOperator::TildeEqual => { - reporter.report( - MarkerWarningKind::LexicographicComparison, - format!("Can't compare {l_string} and {r_string} with `~=`"), - ); - false - } - MarkerOperator::In => r_string.contains(l_string), - MarkerOperator::NotIn => !r_string.contains(l_string), - MarkerOperator::Contains => l_string.contains(r_string), - MarkerOperator::NotContains => !l_string.contains(r_string), - } - } - - /// Creates an instance of [`MarkerExpression::Arbitrary`] with the given values. - pub(crate) fn arbitrary( - l_value: MarkerValue, - operator: MarkerOperator, - r_value: MarkerValue, - ) -> MarkerExpression { - MarkerExpression::Arbitrary { - l_value, - operator, - r_value, - } - } -} - -impl FromStr for MarkerExpression { - type Err = Pep508Error; - - fn from_str(s: &str) -> Result { + /// Returns `None` if the expression consists entirely of meaningless expressions + /// that are ignored, such as `os_name ~= 'foo'`. + #[allow(clippy::should_implement_trait)] + pub fn from_str(s: &str) -> Result, Pep508Error> { MarkerExpression::parse_reporter(s, &mut TracingReporter) } } @@ -743,29 +551,18 @@ impl Display for MarkerExpression { MarkerExpression::Extra { operator, name } => { write!(f, "extra {operator} '{name}'") } - MarkerExpression::Arbitrary { - l_value, - operator, - r_value, - } => { - write!(f, "{l_value} {operator} {r_value}") - } } } } -/// Represents one of the nested marker expressions with and/or/parentheses -#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] -pub enum MarkerTree { - /// A simple expression such as `python_version > "3.8"` - Expression(MarkerExpression), - /// An and between nested expressions, such as - /// `python_version > "3.8" and implementation_name == 'cpython'` - And(Vec), - /// An or between nested expressions, such as - /// `python_version > "3.8" or implementation_name == 'cpython'` - Or(Vec), -} +/// Represents one or more nested marker expressions with and/or/parentheses. +/// +/// Marker trees are canonical, meaning any two functionally equivalent markers +/// will compare equally. Markers also support efficient polynomial-time operations, +/// such as conjunction and disjunction. +// TODO(ibraheem): decide whether we want to implement `Copy` for marker trees +#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct MarkerTree(NodeId); impl<'de> Deserialize<'de> for MarkerTree { fn deserialize(deserializer: D) -> Result @@ -777,15 +574,6 @@ impl<'de> Deserialize<'de> for MarkerTree { } } -impl Serialize for MarkerTree { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&self.to_string()) - } -} - impl FromStr for MarkerTree { type Err = Pep508Error; @@ -808,141 +596,187 @@ impl MarkerTree { parse::parse_markers(markers, reporter) } - /// Whether the marker is `MarkerTree::And(Vec::new())`. - pub fn is_universal(&self) -> bool { - self == &MarkerTree::And(Vec::new()) - } + /// An empty marker that always evaluates to `true`. + pub const TRUE: MarkerTree = MarkerTree(NodeId::TRUE); - /// Does this marker apply in the given environment? - pub fn evaluate(&self, env: &MarkerEnvironment, extras: &[ExtraName]) -> bool { - self.evaluate_optional_environment(Some(env), extras) + /// An unsatisfiable marker that always evaluates to `false`. + pub const FALSE: MarkerTree = MarkerTree(NodeId::FALSE); + + /// Returns a marker tree for a single expression. + pub fn expression(expr: MarkerExpression) -> MarkerTree { + MarkerTree(INTERNER.lock().expression(expr)) } - /// Evaluates this marker tree against an optional environment and a - /// possibly empty sequence of extras. + /// Whether the marker always evaluates to `true`. /// - /// When an environment is not provided, all marker expressions based on - /// the environment evaluate to `true`. That is, this provides environment - /// independent marker evaluation. In practice, this means only the extras - /// are evaluated when an environment is not provided. - pub fn evaluate_optional_environment( - &self, - env: Option<&MarkerEnvironment>, - extras: &[ExtraName], - ) -> bool { - self.report_deprecated_options(&mut TracingReporter); - match self { - Self::Expression(expression) => expression.evaluate(env, extras, &mut TracingReporter), - Self::And(expressions) => expressions - .iter() - .all(|x| x.evaluate_reporter_impl(env, extras, &mut TracingReporter)), - Self::Or(expressions) => expressions - .iter() - .any(|x| x.evaluate_reporter_impl(env, extras, &mut TracingReporter)), - } + /// If this method returns `true`, it is definitively known that the marker will + /// evaluate to `true` in any environment. However, this method may return false + /// negatives, i.e. it may not be able to detect that a marker is always true for + /// complex expressions. + pub fn is_true(&self) -> bool { + self.0.is_true() } - /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. + /// Whether the marker always evaluates to `false`, i.e. the expression is not + /// satisfiable in any environment. /// - /// Any `extra` markers that are always `true` given the provided extras will be removed. - /// Any `extra` markers that are always `false` given the provided extras will be left - /// unchanged. + /// If this method returns `true`, it is definitively known that the marker will + /// evaluate to `false` in any environment. However, this method may return false + /// negatives, i.e. it may not be able to detect that a marker is unsatisfiable + /// for complex expressions. + pub fn is_false(&self) -> bool { + self.0.is_false() + } + + /// Returns a new marker tree that is the negation of this one. + #[must_use] + pub fn negate(&self) -> MarkerTree { + MarkerTree(self.0.not()) + } + + /// Combine this marker tree with the one given via a conjunction. + #[allow(clippy::needless_pass_by_value)] + pub fn and(&mut self, tree: MarkerTree) { + self.0 = INTERNER.lock().and(self.0, tree.0); + } + + /// Combine this marker tree with the one given via a disjunction. + #[allow(clippy::needless_pass_by_value)] + pub fn or(&mut self, tree: MarkerTree) { + self.0 = INTERNER.lock().or(self.0, tree.0); + } + + /// Returns `true` if there is no environment in which both marker trees can both apply, + /// i.e. the expression `first and second` is always false. /// - /// For example, if `dev` is a provided extra, given `sys_platform == 'linux' and extra == 'dev'`, - /// the marker will be simplified to `sys_platform == 'linux'`. - pub fn simplify_extras(self, extras: &[ExtraName]) -> Option { - self.simplify_extras_with(|name| extras.contains(name)) + /// If this method returns `true`, it is definitively known that the two markers can + /// never both evaluate to `true` in a given environment. However, this method may return + /// false negatives, i.e. it may not be able to detect that two markers are disjoint for + /// complex expressions. + pub fn is_disjoint(&self, tree: &MarkerTree) -> bool { + INTERNER.lock().and(self.0, tree.0).is_false() } - /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. + /// Returns the contents of this marker tree, if it contains at least one expression. /// - /// Any `extra` markers that are always `true` given the provided predicate will be removed. - /// Any `extra` markers that are always `false` given the provided predicate will be left - /// unchanged. + /// If the marker is `true`, this method will return `None`. + /// If the marker is `false`, the marker is represented as the normalized expression, `python_version < '0'`. /// - /// For example, if `is_extra('dev')` is true, given - /// `sys_platform == 'linux' and extra == 'dev'`, the marker will be simplified to - /// `sys_platform == 'linux'`. - pub fn simplify_extras_with(self, is_extra: impl Fn(&ExtraName) -> bool) -> Option { - // Because `simplify_extras_with_impl` is recursive, and we need to use - // our predicate in recursive calls, we need the predicate itself to - // have some indirection (or else we'd have to clone it). To avoid a - // recursive type at codegen time, we just introduce the indirection - // here, but keep the calling API ergonomic. - self.simplify_extras_with_impl(&is_extra) + /// The returned type implements [`Display`] and [`serde::Serialize`]. + pub fn contents(&self) -> Option { + if self.is_true() { + return None; + } + + Some(MarkerTreeContents(self.clone())) } - fn simplify_extras_with_impl( - self, - is_extra: &impl Fn(&ExtraName) -> bool, - ) -> Option { - /// Returns `true` if the given expression is always `true` given the set of extras. - fn is_true(expression: &MarkerExpression, is_extra: impl Fn(&ExtraName) -> bool) -> bool { - match expression { - MarkerExpression::Extra { - operator: ExtraOperator::Equal, - name, - } => is_extra(name), - MarkerExpression::Extra { - operator: ExtraOperator::NotEqual, - name, - } => !is_extra(name), - _ => false, - } + /// Returns a simplified string representation of this marker, if it contains at least one + /// expression. + /// + /// If the marker is `true`, this method will return `None`. + /// If the marker is `false`, the marker is represented as the normalized expression, `python_version < '0'`. + pub fn try_to_string(&self) -> Option { + self.contents().map(|contents| contents.to_string()) + } + + /// Returns the underlying [`MarkerTreeKind`] of the root node. + pub fn kind(&self) -> MarkerTreeKind<'_> { + if self.is_true() { + return MarkerTreeKind::True; } - match self { - Self::Expression(expression) => { - // If the expression is true, we can remove the marker entirely. - if is_true(&expression, is_extra) { - None - } else { - // If not, return the original marker. - Some(Self::Expression(expression)) - } + if self.is_false() { + return MarkerTreeKind::False; + } + + let node = INTERNER.shared.node(self.0); + match &node.var { + Variable::Version(key) => { + let Edges::Version { edges: ref map } = node.children else { + unreachable!() + }; + MarkerTreeKind::Version(VersionMarkerTree { + id: self.0, + key: key.clone(), + map, + }) } - Self::And(expressions) => { - // Remove any expressions that are _true_ due to the presence of an extra. - let simplified = expressions - .into_iter() - .filter_map(|marker| marker.simplify_extras_with_impl(is_extra)) - .collect::>(); - - // If there are no expressions left, return None. - if simplified.is_empty() { - None - } else if simplified.len() == 1 { - // If there is only one expression left, return the remaining expression. - simplified.into_iter().next() - } else { - // If there are still expressions left, return the simplified marker. - Some(Self::And(simplified)) - } + Variable::String(key) => { + let Edges::String { edges: ref map } = node.children else { + unreachable!() + }; + MarkerTreeKind::String(StringMarkerTree { + id: self.0, + key: key.clone(), + map, + }) } - Self::Or(expressions) => { - let num_expressions = expressions.len(); - - // Remove any expressions that are _true_ due to the presence of an extra. - let simplified = expressions - .into_iter() - .filter_map(|marker| marker.simplify_extras_with_impl(is_extra)) - .collect::>(); - - // If _any_ of the expressions are true (i.e., if any of the markers were filtered - // out in the above filter step), the entire marker is true. - if simplified.len() < num_expressions { - None - } else if simplified.len() == 1 { - // If there is only one expression left, return the remaining expression. - simplified.into_iter().next() - } else { - // If there are still expressions left, return the simplified marker. - Some(Self::Or(simplified)) - } + Variable::In { key, value } => { + let Edges::Boolean { low, high } = node.children else { + unreachable!() + }; + MarkerTreeKind::In(InMarkerTree { + key: key.clone(), + value, + high: high.negate(self.0), + low: low.negate(self.0), + }) + } + Variable::Contains { key, value } => { + let Edges::Boolean { low, high } = node.children else { + unreachable!() + }; + MarkerTreeKind::Contains(ContainsMarkerTree { + key: key.clone(), + value, + high: high.negate(self.0), + low: low.negate(self.0), + }) + } + Variable::Extra(name) => { + let Edges::Boolean { low, high } = node.children else { + unreachable!() + }; + MarkerTreeKind::Extra(ExtraMarkerTree { + name, + high: high.negate(self.0), + low: low.negate(self.0), + }) } } } + /// Returns a simplified DNF expression for this marker tree. + pub fn to_dnf(&self) -> Vec> { + simplify::to_dnf(self) + } + + /// Does this marker apply in the given environment? + pub fn evaluate(&self, env: &MarkerEnvironment, extras: &[ExtraName]) -> bool { + self.report_deprecated_options(&mut TracingReporter); + self.evaluate_reporter_impl(env, extras, &mut TracingReporter) + } + + /// Evaluates this marker tree against an optional environment and a + /// possibly empty sequence of extras. + /// + /// When an environment is not provided, all marker expressions based on + /// the environment evaluate to `true`. That is, this provides environment + /// independent marker evaluation. In practice, this means only the extras + /// are evaluated when an environment is not provided. + pub fn evaluate_optional_environment( + &self, + env: Option<&MarkerEnvironment>, + extras: &[ExtraName], + ) -> bool { + self.report_deprecated_options(&mut TracingReporter); + match env { + None => self.evaluate_extras(extras), + Some(env) => self.evaluate_reporter_impl(env, extras, &mut TracingReporter), + } + } + /// Same as [`Self::evaluate`], but instead of using logging to warn, you can pass your own /// handler for warnings pub fn evaluate_reporter( @@ -952,24 +786,72 @@ impl MarkerTree { reporter: &mut impl Reporter, ) -> bool { self.report_deprecated_options(reporter); - self.evaluate_reporter_impl(Some(env), extras, reporter) + self.evaluate_reporter_impl(env, extras, reporter) } fn evaluate_reporter_impl( &self, - env: Option<&MarkerEnvironment>, + env: &MarkerEnvironment, extras: &[ExtraName], reporter: &mut impl Reporter, ) -> bool { - match self { - Self::Expression(expression) => expression.evaluate(env, extras, reporter), - Self::And(expressions) => expressions - .iter() - .all(|x| x.evaluate_reporter_impl(env, extras, reporter)), - Self::Or(expressions) => expressions - .iter() - .any(|x| x.evaluate_reporter_impl(env, extras, reporter)), + match self.kind() { + MarkerTreeKind::True => return true, + MarkerTreeKind::False => return false, + MarkerTreeKind::Version(marker) => { + for (range, tree) in marker.edges() { + if range.contains(env.get_version(marker.key())) { + return tree.evaluate_reporter_impl(env, extras, reporter); + } + } + } + MarkerTreeKind::String(marker) => { + for (range, tree) in marker.children() { + let l_string = env.get_string(marker.key()); + + if range.as_singleton().is_none() { + if let Some((start, end)) = range.bounding_range() { + if let Bound::Included(value) | Bound::Excluded(value) = start { + reporter.report( + MarkerWarningKind::LexicographicComparison, + format!("Comparing {l_string} and {value} lexicographically"), + ); + }; + + if let Bound::Included(value) | Bound::Excluded(value) = end { + reporter.report( + MarkerWarningKind::LexicographicComparison, + format!("Comparing {l_string} and {value} lexicographically"), + ); + }; + } + } + + // todo(ibraheem): avoid cloning here, `contains` should accept `&impl Borrow` + let l_string = &l_string.to_string(); + if range.contains(l_string) { + return tree.evaluate_reporter_impl(env, extras, reporter); + } + } + } + MarkerTreeKind::In(marker) => { + return marker + .edge(marker.value().contains(env.get_string(marker.key()))) + .evaluate_reporter_impl(env, extras, reporter); + } + MarkerTreeKind::Contains(marker) => { + return marker + .edge(env.get_string(marker.key()).contains(marker.value())) + .evaluate_reporter_impl(env, extras, reporter); + } + MarkerTreeKind::Extra(marker) => { + return marker + .edge(extras.contains(marker.name())) + .evaluate_reporter_impl(env, extras, reporter); + } } + + false } /// Checks if the requirement should be activated with the given set of active extras and a set @@ -985,16 +867,58 @@ impl MarkerTree { extras: &HashSet, python_versions: &[Version], ) -> bool { - match self { - Self::Expression(expression) => { - expression.evaluate_extras_and_python_version(extras, python_versions) + match self.kind() { + MarkerTreeKind::True => true, + MarkerTreeKind::False => false, + MarkerTreeKind::Version(marker) => marker.edges().any(|(range, tree)| { + if *marker.key() == MarkerValueVersion::PythonVersion { + if !python_versions + .iter() + .any(|version| range.contains(version)) + { + return false; + } + } + + tree.evaluate_extras_and_python_version(extras, python_versions) + }), + MarkerTreeKind::String(marker) => marker + .children() + .any(|(_, tree)| tree.evaluate_extras_and_python_version(extras, python_versions)), + MarkerTreeKind::In(marker) => marker + .children() + .any(|(_, tree)| tree.evaluate_extras_and_python_version(extras, python_versions)), + MarkerTreeKind::Contains(marker) => marker + .children() + .any(|(_, tree)| tree.evaluate_extras_and_python_version(extras, python_versions)), + MarkerTreeKind::Extra(marker) => marker + .edge(extras.contains(marker.name())) + .evaluate_extras_and_python_version(extras, python_versions), + } + } + + /// Checks if the requirement should be activated with the given set of active extras without evaluating + /// the remaining environment markers, i.e. if there is potentially an environment that could activate this + /// requirement. + pub fn evaluate_extras(&self, extras: &[ExtraName]) -> bool { + match self.kind() { + MarkerTreeKind::True => true, + MarkerTreeKind::False => false, + MarkerTreeKind::Version(marker) => { + marker.edges().any(|(_, tree)| tree.evaluate_extras(extras)) } - Self::And(expressions) => expressions - .iter() - .all(|x| x.evaluate_extras_and_python_version(extras, python_versions)), - Self::Or(expressions) => expressions - .iter() - .any(|x| x.evaluate_extras_and_python_version(extras, python_versions)), + MarkerTreeKind::String(marker) => marker + .children() + .any(|(_, tree)| tree.evaluate_extras(extras)), + MarkerTreeKind::In(marker) => marker + .children() + .any(|(_, tree)| tree.evaluate_extras(extras)), + MarkerTreeKind::Contains(marker) => marker + .children() + .any(|(_, tree)| tree.evaluate_extras(extras)), + MarkerTreeKind::Extra(marker) => marker + .edge(extras.contains(marker.name())) + .evaluate_extras(extras), } } @@ -1010,258 +934,451 @@ impl MarkerTree { warnings.push((kind, warning)); }; self.report_deprecated_options(&mut reporter); - let result = self.evaluate_reporter_impl(Some(env), extras, &mut reporter); + let result = self.evaluate_reporter_impl(env, extras, &mut reporter); (result, warnings) } /// Report the deprecated marker from fn report_deprecated_options(&self, reporter: &mut impl Reporter) { - match self { - Self::Expression(expression) => { - let MarkerExpression::String { key, .. } = expression else { - return; - }; - - match key { - MarkerValueString::OsNameDeprecated => { - reporter.report( - MarkerWarningKind::DeprecatedMarkerName, - "os.name is deprecated in favor of os_name".to_string(), - ); - } - MarkerValueString::PlatformMachineDeprecated => { - reporter.report( - MarkerWarningKind::DeprecatedMarkerName, - "platform.machine is deprecated in favor of platform_machine" - .to_string(), - ); - } - MarkerValueString::PlatformPythonImplementationDeprecated => { - reporter.report( - MarkerWarningKind::DeprecatedMarkerName, - "platform.python_implementation is deprecated in favor of platform_python_implementation".to_string(), - ); - } - MarkerValueString::PythonImplementationDeprecated => { - reporter.report( - MarkerWarningKind::DeprecatedMarkerName, - "python_implementation is deprecated in favor of platform_python_implementation".to_string(), - ); - } - MarkerValueString::PlatformVersionDeprecated => { - reporter.report( - MarkerWarningKind::DeprecatedMarkerName, - "platform.version is deprecated in favor of platform_version" - .to_string(), - ); - } - MarkerValueString::SysPlatformDeprecated => { - reporter.report( - MarkerWarningKind::DeprecatedMarkerName, - "sys.platform is deprecated in favor of sys_platform".to_string(), - ); - } - _ => {} + let string_marker = match self.kind() { + MarkerTreeKind::True | MarkerTreeKind::False => return, + MarkerTreeKind::String(marker) => marker, + MarkerTreeKind::Version(marker) => { + for (_, tree) in marker.edges() { + tree.report_deprecated_options(reporter); } + return; } - Self::And(expressions) => { - for expression in expressions { - expression.report_deprecated_options(reporter); + MarkerTreeKind::In(marker) => { + for (_, tree) in marker.children() { + tree.report_deprecated_options(reporter); } + return; } - Self::Or(expressions) => { - for expression in expressions { - expression.report_deprecated_options(reporter); + MarkerTreeKind::Contains(marker) => { + for (_, tree) in marker.children() { + tree.report_deprecated_options(reporter); } + return; } - } - } - - /// Returns a new marker tree that is the negation of this one. - #[must_use] - pub fn negate(&self) -> MarkerTree { - match *self { - MarkerTree::Expression(ref expr) => expr.negate(), - MarkerTree::And(ref trees) => { - let mut negated = MarkerTree::Or(Vec::with_capacity(trees.len())); - for tree in trees { - negated.or(tree.negate()); + MarkerTreeKind::Extra(marker) => { + for (_, tree) in marker.children() { + tree.report_deprecated_options(reporter); } - negated + return; } - MarkerTree::Or(ref trees) => { - let mut negated = MarkerTree::And(Vec::with_capacity(trees.len())); - for tree in trees { - negated.and(tree.negate()); - } - negated + }; + + match string_marker.key() { + MarkerValueString::OsNameDeprecated => { + reporter.report( + MarkerWarningKind::DeprecatedMarkerName, + "os.name is deprecated in favor of os_name".to_string(), + ); } + MarkerValueString::PlatformMachineDeprecated => { + reporter.report( + MarkerWarningKind::DeprecatedMarkerName, + "platform.machine is deprecated in favor of platform_machine".to_string(), + ); + } + MarkerValueString::PlatformPythonImplementationDeprecated => { + reporter.report( + MarkerWarningKind::DeprecatedMarkerName, + "platform.python_implementation is deprecated in favor of + platform_python_implementation" + .to_string(), + ); + } + MarkerValueString::PythonImplementationDeprecated => { + reporter.report( + MarkerWarningKind::DeprecatedMarkerName, + "python_implementation is deprecated in favor of + platform_python_implementation" + .to_string(), + ); + } + MarkerValueString::PlatformVersionDeprecated => { + reporter.report( + MarkerWarningKind::DeprecatedMarkerName, + "platform.version is deprecated in favor of platform_version".to_string(), + ); + } + MarkerValueString::SysPlatformDeprecated => { + reporter.report( + MarkerWarningKind::DeprecatedMarkerName, + "sys.platform is deprecated in favor of sys_platform".to_string(), + ); + } + _ => {} + } + + for (_, tree) in string_marker.children() { + tree.report_deprecated_options(reporter); } } - /// Combine this marker tree with the one given via a conjunction. + /// Find a top level `extra == "..."` expression. /// - /// This does some shallow flattening. That is, if `self` is a conjunction - /// already, then `tree` is added to it instead of creating a new - /// conjunction. - pub fn and(&mut self, tree: MarkerTree) { - if self == &tree { - return; - } - match *self { - MarkerTree::Expression(_) | MarkerTree::Or(_) => { - let this = std::mem::replace(self, MarkerTree::And(vec![])); - *self = MarkerTree::And(vec![this]); + /// ASSUMPTION: There is one `extra = "..."`, and it's either the only marker or part of the + /// main conjunction. + pub fn top_level_extra(&self) -> Option { + let mut extra_expression = None; + for conjunction in self.to_dnf() { + let found = conjunction.iter().find(|expression| { + matches!( + expression, + MarkerExpression::Extra { + operator: ExtraOperator::Equal, + .. + } + ) + })?; + + // Because the marker tree is in DNF form, we must verify that the extra expression is part + // of all solutions to this marker. + if let Some(ref extra_expression) = extra_expression { + if *extra_expression != *found { + return None; + } + + continue; } - MarkerTree::And(_) => {} + + extra_expression = Some(found.clone()); } - if let MarkerTree::And(ref mut exprs) = *self { - if let MarkerTree::And(tree) = tree { - exprs.extend(tree); - } else { - exprs.push(tree); - } - if exprs.len() == 1 { - *self = exprs.pop().unwrap(); + + extra_expression + } + + /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. + /// + /// Any `extra` markers that are always `true` given the provided extras will be removed. + /// Any `extra` markers that are always `false` given the provided extras will be left + /// unchanged. + /// + /// For example, if `dev` is a provided extra, given `sys_platform == 'linux' and extra == 'dev'`, + /// the marker will be simplified to `sys_platform == 'linux'`. + #[must_use] + #[allow(clippy::needless_pass_by_value)] + pub fn simplify_python_versions( + self, + python_version: Range, + full_python_version: Range, + ) -> MarkerTree { + MarkerTree(INTERNER.lock().restrict_versions(self.0, &|var| match var { + Variable::Version(MarkerValueVersion::PythonVersion) => Some(python_version.clone()), + Variable::Version(MarkerValueVersion::PythonFullVersion) => { + Some(full_python_version.clone()) } - } + _ => None, + })) } - /// Combine this marker tree with the one given via a disjunction. + /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. /// - /// This does some shallow flattening. That is, if `self` is a disjunction - /// already, then `tree` is added to it instead of creating a new - /// disjunction. - pub fn or(&mut self, tree: MarkerTree) { - if self == &tree { - return; + /// Any `extra` markers that are always `true` given the provided extras will be removed. + /// Any `extra` markers that are always `false` given the provided extras will be left + /// unchanged. + /// + /// For example, if `dev` is a provided extra, given `sys_platform == 'linux' and extra == 'dev'`, + /// the marker will be simplified to `sys_platform == 'linux'`. + #[must_use] + pub fn simplify_extras(self, extras: &[ExtraName]) -> MarkerTree { + self.simplify_extras_with(|name| extras.contains(name)) + } + + /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. + /// + /// Any `extra` markers that are always `true` given the provided predicate will be removed. + /// Any `extra` markers that are always `false` given the provided predicate will be left + /// unchanged. + /// + /// For example, if `is_extra('dev')` is true, given + /// `sys_platform == 'linux' and extra == 'dev'`, the marker will be simplified to + /// `sys_platform == 'linux'`. + #[must_use] + pub fn simplify_extras_with(self, is_extra: impl Fn(&ExtraName) -> bool) -> MarkerTree { + // Because `simplify_extras_with_impl` is recursive, and we need to use + // our predicate in recursive calls, we need the predicate itself to + // have some indirection (or else we'd have to clone it). To avoid a + // recursive type at codegen time, we just introduce the indirection + // here, but keep the calling API ergonomic. + self.simplify_extras_with_impl(&is_extra) + } + + fn simplify_extras_with_impl(self, is_extra: &impl Fn(&ExtraName) -> bool) -> MarkerTree { + MarkerTree(INTERNER.lock().restrict(self.0, &|var| match var { + Variable::Extra(name) => is_extra(name).then_some(true), + _ => None, + })) + } +} + +impl fmt::Debug for MarkerTree { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if self.is_true() { + return write!(f, "true"); } - match *self { - MarkerTree::Expression(_) | MarkerTree::And(_) => { - let this = std::mem::replace(self, MarkerTree::And(vec![])); - *self = MarkerTree::Or(vec![this]); - } - MarkerTree::Or(_) => {} + + if self.is_false() { + return write!(f, "false"); } - if let MarkerTree::Or(ref mut exprs) = *self { - if let MarkerTree::Or(tree) = tree { - exprs.extend(tree); - } else { - exprs.push(tree); - } - if exprs.len() == 1 { - *self = exprs.pop().unwrap(); - } + + write!(f, "{}", self.contents().unwrap()) + } +} + +/// The underlying kind of an arbitrary node in a [`MarkerTree`]. +/// +/// A marker tree is represented as an algebraic decision tree with two terminal nodes +/// `True` or `False`. The edges of a given node correspond to a particular assignment of +/// a value to that variable. +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum MarkerTreeKind<'a> { + /// An empty marker that always evaluates to `true`. + True, + /// An unsatisfiable marker that always evaluates to `false`. + False, + /// A version expression. + Version(VersionMarkerTree<'a>), + /// A string expression. + String(StringMarkerTree<'a>), + /// A string expression with the `in` operator. + In(InMarkerTree<'a>), + /// A string expression with the `contains` operator. + Contains(ContainsMarkerTree<'a>), + /// A string expression. + Extra(ExtraMarkerTree<'a>), +} + +/// A version marker node, such as `python_version < '3.7'`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct VersionMarkerTree<'a> { + id: NodeId, + key: MarkerValueVersion, + map: &'a [(Range, NodeId)], +} + +impl VersionMarkerTree<'_> { + /// The key for this node. + pub fn key(&self) -> &MarkerValueVersion { + &self.key + } + + /// The edges of this node, corresponding to possible output ranges of the given variable. + pub fn edges(&self) -> impl ExactSizeIterator, MarkerTree)> + '_ { + self.map + .iter() + .map(|(range, node)| (range, MarkerTree(node.negate(self.id)))) + } +} + +/// A string marker node, such as `os_name == 'Linux'`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct StringMarkerTree<'a> { + id: NodeId, + key: MarkerValueString, + map: &'a [(Range, NodeId)], +} + +impl StringMarkerTree<'_> { + /// The key for this node. + pub fn key(&self) -> &MarkerValueString { + &self.key + } + + /// The edges of this node, corresponding to possible output ranges of the given variable. + pub fn children(&self) -> impl ExactSizeIterator, MarkerTree)> { + self.map + .iter() + .map(|(range, node)| (range, MarkerTree(node.negate(self.id)))) + } +} + +/// A string marker node with the `in` operator, such as `os_name in 'WindowsLinux'`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct InMarkerTree<'a> { + key: MarkerValueString, + value: &'a str, + high: NodeId, + low: NodeId, +} + +impl InMarkerTree<'_> { + /// The key (LHS) for this expression. + pub fn key(&self) -> &MarkerValueString { + &self.key + } + + /// The value (RHS) for this expression. + pub fn value(&self) -> &str { + self.value + } + + /// The edges of this node, corresponding to the boolean evaluation of the expression. + pub fn children(&self) -> impl Iterator { + [(true, MarkerTree(self.high)), (false, MarkerTree(self.low))].into_iter() + } + + /// Returns the subtree associated with the given edge value. + pub fn edge(&self, value: bool) -> MarkerTree { + if value { + MarkerTree(self.high) + } else { + MarkerTree(self.low) } } +} - /// Find a top level `extra == "..."` expression. - /// - /// ASSUMPTION: There is one `extra = "..."`, and it's either the only marker or part of the - /// main conjunction. - pub fn top_level_extra(&self) -> Option<&MarkerExpression> { - match &self { - MarkerTree::Expression(extra_expression @ MarkerExpression::Extra { .. }) => { - Some(extra_expression) - } - MarkerTree::And(and) => and.iter().find_map(|marker| { - if let MarkerTree::Expression(extra_expression @ MarkerExpression::Extra { .. }) = - marker - { - Some(extra_expression) - } else { - None - } - }), - MarkerTree::Expression(_) | MarkerTree::Or(_) => None, +/// A string marker node with inverse of the `in` operator, such as `'nux' in os_name`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct ContainsMarkerTree<'a> { + key: MarkerValueString, + value: &'a str, + high: NodeId, + low: NodeId, +} + +impl ContainsMarkerTree<'_> { + /// The key (LHS) for this expression. + pub fn key(&self) -> &MarkerValueString { + &self.key + } + + /// The value (RHS) for this expression. + pub fn value(&self) -> &str { + self.value + } + + /// The edges of this node, corresponding to the boolean evaluation of the expression. + pub fn children(&self) -> impl Iterator { + [(true, MarkerTree(self.high)), (false, MarkerTree(self.low))].into_iter() + } + + /// Returns the subtree associated with the given edge value. + pub fn edge(&self, value: bool) -> MarkerTree { + if value { + MarkerTree(self.high) + } else { + MarkerTree(self.low) } } } -impl Display for MarkerTree { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let format_inner = |expression: &Self| { - if matches!(expression, Self::Expression(_)) { - format!("{expression}") - } else { - format!("({expression})") - } - }; - match self { - Self::Expression(expression) => write!(f, "{expression}"), - Self::And(and_list) => f.write_str( - &and_list - .iter() - .map(format_inner) - .collect::>() - .join(" and "), - ), - Self::Or(or_list) => f.write_str( - &or_list - .iter() - .map(format_inner) - .collect::>() - .join(" or "), - ), +/// A node representing the existence or absence of a given extra, such as `extra == 'bar'`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct ExtraMarkerTree<'a> { + name: &'a ExtraName, + high: NodeId, + low: NodeId, +} + +impl ExtraMarkerTree<'_> { + /// Returns the name of the extra in this expression. + pub fn name(&self) -> &ExtraName { + self.name + } + + /// The edges of this node, corresponding to the boolean evaluation of the expression. + pub fn children(&self) -> impl Iterator { + [(true, MarkerTree(self.high)), (false, MarkerTree(self.low))].into_iter() + } + + /// Returns the subtree associated with the given edge value. + pub fn edge(&self, value: bool) -> MarkerTree { + if value { + MarkerTree(self.high) + } else { + MarkerTree(self.low) } } } -/// Negates a compatible version marker expression, from its component parts. +/// A marker tree that contains at least one expression. /// -/// Here, we consider `key ~= V.N` to be equivalent to -/// `key >= V.N and key == V.*`. So the negation returned is -/// `key < V.N or key != V.*`. -fn negate_compatible_version(key: MarkerValueVersion, version: Version) -> MarkerTree { - assert!( - version.release().len() > 1, - "~= requires more than 1 release version number" - ); - // I believe we're already guaranteed that this is true, - // because we're only here if this version was combined - // with ~=, which cannot be used with local versions anyway. - // But this ensures correctness and should be pretty cheap. - let version = version.without_local(); - let pattern = VersionPattern::wildcard(Version::new( - &version.release()[..version.release().len() - 1], - )); - // OK because this can only fail for local versions or when using - // ~=, but neither is the case here. - let disjunct1 = VersionSpecifier::from_version(pep440_rs::Operator::LessThan, version).unwrap(); - // And this is OK because it only fails if the above would fail - // (which we know it doesn't) or if the operator is not compatible - // with wildcards, but != is. - let disjunct2 = VersionSpecifier::from_pattern(pep440_rs::Operator::NotEqual, pattern).unwrap(); - MarkerTree::Or(vec![ - MarkerTree::Expression(MarkerExpression::Version { - key: key.clone(), - specifier: disjunct1, - }), - MarkerTree::Expression(MarkerExpression::Version { - key, - specifier: disjunct2, - }), - ]) +/// See [`MarkerTree::contents`] for details. +#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord, Debug)] +pub struct MarkerTreeContents(MarkerTree); + +impl From for MarkerTree { + fn from(value: MarkerTreeContents) -> Self { + value.0 + } +} + +impl AsRef for MarkerTreeContents { + fn as_ref(&self) -> &MarkerTree { + &self.0 + } +} + +impl Serialize for MarkerTreeContents { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl Display for MarkerTreeContents { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // Normalize all `false` expressions to the same trivially false expression. + if self.0.is_false() { + return write!(f, "python_version < '0'"); + } + + // Write the output in DNF form. + let dnf = self.0.to_dnf(); + let format_conjunction = |conjunction: &Vec| { + conjunction + .iter() + .map(MarkerExpression::to_string) + .collect::>() + .join(" and ") + }; + + let expr = match &dnf[..] { + [conjunction] => format_conjunction(conjunction), + _ => dnf + .iter() + .map(|conjunction| { + if conjunction.len() == 1 { + format_conjunction(conjunction) + } else { + format!("({})", format_conjunction(conjunction)) + } + }) + .collect::>() + .join(" or "), + }; + + f.write_str(&expr) + } } #[cfg(test)] mod test { + use std::ops::Bound; use std::str::FromStr; use insta::assert_snapshot; - - use pep440_rs::VersionSpecifier; + use pep440_rs::Version; + use pubgrub::Range; use uv_normalize::ExtraName; - use crate::marker::{ExtraOperator, MarkerEnvironment, MarkerEnvironmentBuilder}; - use crate::{ - MarkerExpression, MarkerOperator, MarkerTree, MarkerValueString, MarkerValueVersion, - }; + use crate::marker::{MarkerEnvironment, MarkerEnvironmentBuilder}; + use crate::{MarkerExpression, MarkerOperator, MarkerTree, MarkerValueString}; fn parse_err(input: &str) -> String { MarkerTree::from_str(input).unwrap_err().to_string() } + fn m(s: &str) -> MarkerTree { + s.parse().unwrap() + } + fn env37() -> MarkerEnvironment { MarkerEnvironment::try_from(MarkerEnvironmentBuilder { implementation_name: "", @@ -1307,75 +1424,93 @@ mod test { r#"python_version == "2.7" and (sys_platform == "win32" or sys_platform == "linux")"#, ), ]; + for (a, b) in values { - assert_eq!( - MarkerTree::from_str(a).unwrap(), - MarkerTree::from_str(b).unwrap(), - "{a} {b}" - ); + assert_eq!(m(a), m(b), "{a} {b}"); } } #[test] - fn test_marker_negation() { - let neg = |marker_string: &str| -> String { - let tree: MarkerTree = marker_string.parse().unwrap(); - tree.negate().to_string() - }; - - assert_eq!(neg("python_version > '3.6'"), "python_version <= '3.6'"); - assert_eq!(neg("'3.6' < python_version"), "python_version <= '3.6'"); - + fn simplify_python_versions() { assert_eq!( - neg("python_version == '3.6.*'"), - "python_version != '3.6.*'" - ); - assert_eq!( - neg("python_version != '3.6.*'"), - "python_version == '3.6.*'" + m("(extra == 'foo' and sys_platform == 'win32') or extra == 'foo'") + .simplify_extras(&["foo".parse().unwrap()]), + MarkerTree::TRUE ); assert_eq!( - neg("python_version ~= '3.6'"), - "python_version < '3.6' or python_version != '3.*'" - ); - assert_eq!( - neg("'3.6' ~= python_version"), - "python_version < '3.6' or python_version != '3.*'" + m("(python_version <= '3.11' and sys_platform == 'win32') or python_version > '3.11'") + .simplify_python_versions( + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 12])), + Bound::Unbounded, + )), + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 12])), + Bound::Unbounded, + )), + ), + MarkerTree::TRUE ); + assert_eq!( - neg("python_version ~= '3.6.2'"), - "python_version < '3.6.2' or python_version != '3.6.*'" + m("python_version < '3.10'") + .simplify_python_versions( + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 7])), + Bound::Unbounded, + )), + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 7])), + Bound::Unbounded, + )), + ) + .try_to_string() + .unwrap(), + "python_version < '3.10'" ); - assert_eq!(neg("sys_platform == 'linux'"), "sys_platform != 'linux'"); - assert_eq!(neg("'linux' == sys_platform"), "sys_platform != 'linux'"); - - // ~= is nonsense on string markers. Evaluation always returns false - // in this case, so technically negation would be an expression that - // always returns true. But, as we do with "arbitrary" markers, we - // don't let the negation of nonsense become sensible. - assert_eq!(neg("sys_platform ~= 'linux'"), "sys_platform ~= 'linux'"); - - // As above, arbitrary exprs remain arbitrary. - assert_eq!(neg("'foo' == 'bar'"), "'foo' != 'bar'"); - - // Conjunctions assert_eq!( - neg("os_name == 'bar' and os_name == 'foo'"), - "os_name != 'bar' or os_name != 'foo'" + m("python_version <= '3.12'").simplify_python_versions( + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 12])), + Bound::Unbounded, + )), + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 12])), + Bound::Unbounded, + )), + ), + MarkerTree::FALSE ); - // Disjunctions + assert_eq!( - neg("os_name == 'bar' or os_name == 'foo'"), - "os_name != 'bar' and os_name != 'foo'" + m("python_full_version <= '3.12.1'") + .simplify_python_versions( + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 12])), + Bound::Unbounded, + )), + Range::from_range_bounds(( + Bound::Excluded(Version::new([3, 12])), + Bound::Unbounded, + )), + ) + .try_to_string() + .unwrap(), + "python_full_version <= '3.12.1'" ); + } - // Always true negates to always false! - assert_eq!( - neg("python_version >= '3.6' or python_version < '3.6'"), - "python_version < '3.6' and python_version >= '3.6'" + #[test] + fn release_only() { + assert!(m("python_full_version > '3.10' or python_full_version <= '3.10'").is_true()); + assert!( + m("python_full_version > '3.10' or python_full_version <= '3.10'") + .negate() + .is_false() ); + assert!(m("python_full_version > '3.10' and python_full_version <= '3.10'").is_false()); } #[test] @@ -1489,11 +1624,12 @@ mod test { assert_eq!(warnings, &[]); assert!(!result); + // Meaningless expressions are ignored, so this is always true. let (result, warnings) = MarkerTree::from_str("'3.*' == python_version") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); - assert!(!result); + assert!(result); } #[test] @@ -1558,7 +1694,9 @@ mod test { #[test] fn test_marker_expression() { assert_eq!( - MarkerExpression::from_str(r#"os_name == "nt""#).unwrap(), + MarkerExpression::from_str(r#"os_name == "nt""#) + .unwrap() + .unwrap(), MarkerExpression::String { key: MarkerValueString::OsName, operator: MarkerOperator::Equal, @@ -1573,30 +1711,11 @@ mod test { MarkerTree::from_str( r#""nt" in os_name and '3.7' >= python_version and python_full_version >= '3.7'"# ) - .unwrap(), - MarkerTree::And(vec![ - MarkerTree::Expression(MarkerExpression::String { - value: "nt".to_string(), - operator: MarkerOperator::Contains, - key: MarkerValueString::OsName, - }), - MarkerTree::Expression(MarkerExpression::Version { - key: MarkerValueVersion::PythonVersion, - specifier: VersionSpecifier::from_pattern( - pep440_rs::Operator::LessThanEqual, - "3.7".parse().unwrap() - ) - .unwrap() - }), - MarkerTree::Expression(MarkerExpression::Version { - key: MarkerValueVersion::PythonFullVersion, - specifier: VersionSpecifier::from_pattern( - pep440_rs::Operator::GreaterThanEqual, - "3.7".parse().unwrap() - ) - .unwrap() - }), - ]) + .unwrap() + .contents() + .unwrap() + .to_string(), + "python_full_version >= '3.7' and python_version <= '3.7' and 'nt' in os_name", ); } @@ -1639,53 +1758,31 @@ mod test { // Given `os_name == "nt" and extra == "dev"`, simplify to `os_name == "nt"`. let markers = MarkerTree::from_str(r#"os_name == "nt" and extra == "dev""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); - assert_eq!( - simplified, - Some(MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::OsName, - operator: MarkerOperator::Equal, - value: "nt".to_string(), - })) - ); + let expected = MarkerTree::from_str(r#"os_name == "nt""#).unwrap(); + assert_eq!(simplified, expected); // Given `os_name == "nt" or extra == "dev"`, remove the marker entirely. let markers = MarkerTree::from_str(r#"os_name == "nt" or extra == "dev""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); - assert_eq!(simplified, None); + assert_eq!(simplified, MarkerTree::TRUE); // Given `extra == "dev"`, remove the marker entirely. let markers = MarkerTree::from_str(r#"extra == "dev""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); - assert_eq!(simplified, None); + assert_eq!(simplified, MarkerTree::TRUE); // Given `extra == "dev" and extra == "test"`, simplify to `extra == "test"`. let markers = MarkerTree::from_str(r#"extra == "dev" and extra == "test""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); - assert_eq!( - simplified, - Some(MarkerTree::Expression(MarkerExpression::Extra { - operator: ExtraOperator::Equal, - name: ExtraName::from_str("test").unwrap(), - })) - ); + let expected = MarkerTree::from_str(r#"extra == "test""#).unwrap(); + assert_eq!(simplified, expected); // Given `os_name == "nt" and extra == "test"`, don't simplify. let markers = MarkerTree::from_str(r#"os_name == "nt" and extra == "test""#).unwrap(); - let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); - assert_eq!( - simplified, - Some(MarkerTree::And(vec![ - MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::OsName, - operator: MarkerOperator::Equal, - value: "nt".to_string(), - }), - MarkerTree::Expression(MarkerExpression::Extra { - operator: ExtraOperator::Equal, - name: ExtraName::from_str("test").unwrap(), - }), - ])) - ); + let simplified = markers + .clone() + .simplify_extras(&[ExtraName::from_str("dev").unwrap()]); + assert_eq!(simplified, markers); // Given `os_name == "nt" and (python_version == "3.7" or extra == "dev")`, simplify to // `os_name == "nt". @@ -1694,14 +1791,8 @@ mod test { ) .unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); - assert_eq!( - simplified, - Some(MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::OsName, - operator: MarkerOperator::Equal, - value: "nt".to_string(), - })) - ); + let expected = MarkerTree::from_str(r#"os_name == "nt""#).unwrap(); + assert_eq!(simplified, expected); // Given `os_name == "nt" or (python_version == "3.7" and extra == "dev")`, simplify to // `os_name == "nt" or python_version == "3.7"`. @@ -1710,23 +1801,537 @@ mod test { ) .unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); + let expected = + MarkerTree::from_str(r#"os_name == "nt" or python_version == "3.7""#).unwrap(); + assert_eq!(simplified, expected); + } + + #[test] + fn test_marker_simplification() { + assert_simplifies( + "python_version == '3.9' or python_version == '3.9'", + "python_version == '3.9'", + ); + + assert_simplifies( + "python_version < '3.17' or python_version < '3.18'", + "python_version < '3.18'", + ); + + assert_simplifies( + "python_version > '3.17' or python_version > '3.18' or python_version > '3.12'", + "python_version > '3.12'", + ); + + // a quirk of how pubgrub works, but this is considered part of normalization + assert_simplifies( + "python_version > '3.17.post4' or python_version > '3.18.post4'", + "python_version > '3.17'", + ); + + assert_simplifies( + "python_version < '3.17' and python_version < '3.18'", + "python_version < '3.17'", + ); + + assert_simplifies( + "python_version <= '3.18' and python_version == '3.18'", + "python_version == '3.18'", + ); + + assert_simplifies( + "python_version <= '3.18' or python_version == '3.18'", + "python_version <= '3.18'", + ); + + assert_simplifies( + "python_version <= '3.15' or (python_version <= '3.17' and python_version < '3.16')", + "python_version < '3.16'", + ); + + assert_simplifies( + "(python_version > '3.17' or python_version > '3.16') and python_version > '3.15'", + "python_version > '3.16'", + ); + + assert_simplifies( + "(python_version > '3.17' or python_version > '3.16') and python_version > '3.15' and implementation_version == '1'", + "implementation_version == '1' and python_version > '3.16'", + ); + + assert_simplifies( + "('3.17' < python_version or '3.16' < python_version) and '3.15' < python_version and implementation_version == '1'", + "implementation_version == '1' and python_version > '3.16'", + ); + + assert_simplifies("extra == 'a' or extra == 'a'", "extra == 'a'"); + assert_simplifies( + "extra == 'a' and extra == 'a' or extra == 'b'", + "extra == 'a' or extra == 'b'", + ); + + assert!(m("python_version < '3.17' and '3.18' == python_version").is_false()); + + // flatten nested expressions + assert_simplifies( + "((extra == 'a' and extra == 'b') and extra == 'c') and extra == 'b'", + "extra == 'a' and extra == 'b' and extra == 'c'", + ); + + assert_simplifies( + "((extra == 'a' or extra == 'b') or extra == 'c') or extra == 'b'", + "extra == 'a' or extra == 'b' or extra == 'c'", + ); + + // complex expressions + assert_simplifies( + "extra == 'a' or (extra == 'a' and extra == 'b')", + "extra == 'a'", + ); + + assert_simplifies( + "extra == 'a' and (extra == 'a' or extra == 'b')", + "extra == 'a'", + ); + + assert_simplifies( + "(extra == 'a' and (extra == 'a' or extra == 'b')) or extra == 'd'", + "extra == 'a' or extra == 'd'", + ); + + assert_simplifies( + "((extra == 'a' and extra == 'b') or extra == 'c') or extra == 'b'", + "extra == 'b' or extra == 'c'", + ); + + assert_simplifies( + "((extra == 'a' or extra == 'b') and extra == 'c') and extra == 'b'", + "extra == 'b' and extra == 'c'", + ); + + assert_simplifies( + "((extra == 'a' or extra == 'b') and extra == 'c') or extra == 'b'", + "(extra == 'a' and extra == 'c') or extra == 'b'", + ); + + // post-normalization filtering + assert_simplifies( + "(python_version < '3.1' or python_version < '3.2') and (python_version < '3.2' or python_version == '3.3')", + "python_version < '3.2'", + ); + + // normalize out redundant ranges + assert_true("python_version < '3.12.0rc1' or python_version >= '3.12.0rc1'"); + + assert_true( + "extra == 'a' or (python_version < '3.12.0rc1' or python_version >= '3.12.0rc1')", + ); + + assert_simplifies( + "extra == 'a' and (python_version < '3.12.0rc1' or python_version >= '3.12.0rc1')", + "extra == 'a'", + ); + + // normalize `!=` operators + assert_true("python_version != '3.10' or python_version < '3.12'"); + + assert_simplifies( + "python_version != '3.10' or python_version > '3.12'", + "python_version != '3.10'", + ); + + assert_simplifies( + "python_version != '3.8' and python_version < '3.10'", + "python_version < '3.8' or (python_version > '3.8' and python_version < '3.10')", + ); + + assert_simplifies( + "python_version != '3.8' and python_version != '3.9'", + "python_version != '3.8' and python_version != '3.9'", + ); + + // normalize out redundant expressions + assert_true("sys_platform == 'win32' or sys_platform != 'win32'"); + + assert_true("'win32' == sys_platform or sys_platform != 'win32'"); + + assert_true( + "sys_platform == 'win32' or sys_platform == 'win32' or sys_platform != 'win32'", + ); + + assert!(m("sys_platform == 'win32' and sys_platform != 'win32'").is_false()); + } + + #[test] + fn test_marker_negation() { + assert_eq!( + m("python_version > '3.6'").negate(), + m("python_version <= '3.6'") + ); + + assert_eq!( + m("'3.6' < python_version").negate(), + m("python_version <= '3.6'") + ); + + assert_eq!( + m("python_version != '3.6' and os_name == 'Linux'").negate(), + m("python_version == '3.6' or os_name != 'Linux'") + ); + + assert_eq!( + m("python_version == '3.6' and os_name != 'Linux'").negate(), + m("python_version != '3.6' or os_name == 'Linux'") + ); + + assert_eq!( + m("python_version != '3.6.*' and os_name == 'Linux'").negate(), + m("python_version == '3.6.*' or os_name != 'Linux'") + ); + + assert_eq!( + m("python_version == '3.6.*'").negate(), + m("python_version != '3.6.*'") + ); + assert_eq!( + m("python_version != '3.6.*'").negate(), + m("python_version == '3.6.*'") + ); + + assert_eq!( + m("python_version ~= '3.6'").negate(), + m("python_version < '3.6' or python_version != '3.*'") + ); + assert_eq!( + m("'3.6' ~= python_version").negate(), + m("python_version < '3.6' or python_version != '3.*'") + ); + assert_eq!( + m("python_version ~= '3.6.2'").negate(), + m("python_version < '3.6.2' or python_version != '3.6.*'") + ); + + assert_eq!( + m("sys_platform == 'linux'").negate(), + m("sys_platform != 'linux'") + ); + assert_eq!( + m("'linux' == sys_platform").negate(), + m("sys_platform != 'linux'") + ); + + // ~= is nonsense on string markers, so the markers is ignored and always + // evaluates to true. Thus the negation always returns false. + assert_eq!(m("sys_platform ~= 'linux'").negate(), MarkerTree::FALSE); + + // As above, arbitrary exprs remain arbitrary. + assert_eq!(m("'foo' == 'bar'").negate(), MarkerTree::FALSE); + + // Conjunctions assert_eq!( - simplified, - Some(MarkerTree::Or(vec![ - MarkerTree::Expression(MarkerExpression::String { - key: MarkerValueString::OsName, - operator: MarkerOperator::Equal, - value: "nt".to_string(), - }), - MarkerTree::Expression(MarkerExpression::Version { - key: MarkerValueVersion::PythonVersion, - specifier: VersionSpecifier::from_pattern( - pep440_rs::Operator::Equal, - "3.7".parse().unwrap() - ) - .unwrap(), - }), - ])) + m("os_name == 'bar' and os_name == 'foo'").negate(), + m("os_name != 'bar' or os_name != 'foo'") + ); + // Disjunctions + assert_eq!( + m("os_name == 'bar' or os_name == 'foo'").negate(), + m("os_name != 'bar' and os_name != 'foo'") + ); + + // Always true negates to always false! + assert_eq!( + m("python_version >= '3.6' or python_version < '3.6'").negate(), + m("python_version < '3.6' and python_version >= '3.6'") + ); + } + + #[test] + fn test_complex_marker_simplification() { + assert_simplifies("python_version > '3.7'", "python_version > '3.7'"); + + assert_simplifies( + "(python_version <= '3.7' and os_name == 'Linux') or python_version > '3.7'", + "os_name == 'Linux' or python_version > '3.7'", + ); + + assert_simplifies( + "(os_name == 'Linux' and sys_platform == 'win32') \ + or (os_name != 'Linux' and sys_platform == 'win32' and python_version == '3.7') \ + or (os_name != 'Linux' and sys_platform == 'win32' and python_version == '3.8')", + "(os_name == 'Linux' and sys_platform == 'win32') \ + or (python_version == '3.7' and sys_platform == 'win32') \ + or (python_version == '3.8' and sys_platform == 'win32')", ); + + assert_simplifies( + "(os_name == 'Linux' and platform_system == 'win32') + or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'a') + or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'x') + or (os_name != 'Linux' and platform_system == 'win32' and sys_platform == 'x') + or (os_name == 'Linux' and platform_system != 'win32' and sys_platform == 'x') + or (os_name != 'Linux' and platform_system != 'win32' and sys_platform == 'x')", + "sys_platform == 'x' or (os_name == 'Linux' and platform_system == 'win32')", + ); + + assert_simplifies( + "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')", + "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')" + ); + + assert_simplifies( + "(sys_platform == 'darwin' or sys_platform == 'win32') + and ((implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32'))", + "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')" + ); + + assert_simplifies( + "(sys_platform == 'darwin' or sys_platform == 'win32') + and ((platform_version != '1' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32'))", + "(os_name == 'nt' and platform_version != '1' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')" + ); + + assert_simplifies( + "(os_name == 'nt' and sys_platform == 'win32') \ + or (os_name != 'nt' and platform_version == '1' and (sys_platform == 'win32' or sys_platform == 'win64'))", + "(platform_version == '1' and sys_platform == 'win32') \ + or (os_name != 'nt' and platform_version == '1' and sys_platform == 'win64') \ + or (os_name == 'nt' and sys_platform == 'win32')" + ); + + assert_simplifies( + "(os_name == 'nt' and sys_platform == 'win32') or (os_name != 'nt' and (sys_platform == 'win32' or sys_platform == 'win64'))", + "(os_name != 'nt' and sys_platform == 'win64') or sys_platform == 'win32'" + ); + } + + #[test] + fn test_requires_python() { + fn simplified(marker: &str) -> MarkerTree { + m(marker).simplify_python_versions( + Range::from_range_bounds((Bound::Included(Version::new([3, 8])), Bound::Unbounded)), + Range::from_range_bounds((Bound::Included(Version::new([3, 8])), Bound::Unbounded)), + ) + } + + assert_eq!(simplified("python_version >= '3.8'"), MarkerTree::TRUE); + assert_eq!( + simplified("python_version >= '3.8' or sys_platform == 'win32'"), + MarkerTree::TRUE + ); + + assert_eq!( + simplified("python_version >= '3.8' and sys_platform == 'win32'"), + m("sys_platform == 'win32'"), + ); + + assert_eq!( + simplified("python_version == '3.8'") + .try_to_string() + .unwrap(), + "python_version == '3.8'" + ); + + assert_eq!( + simplified("python_version <= '3.10'") + .try_to_string() + .unwrap(), + "python_version <= '3.10'" + ); + } + + #[test] + fn test_extra_disjointness() { + assert!(!is_disjoint("extra == 'a'", "python_version == '1'")); + + assert!(!is_disjoint("extra == 'a'", "extra == 'a'")); + assert!(!is_disjoint("extra == 'a'", "extra == 'b'")); + assert!(!is_disjoint("extra == 'b'", "extra == 'a'")); + assert!(!is_disjoint("extra == 'b'", "extra != 'a'")); + assert!(!is_disjoint("extra != 'b'", "extra == 'a'")); + assert!(is_disjoint("extra != 'b'", "extra == 'b'")); + assert!(is_disjoint("extra == 'b'", "extra != 'b'")); + } + + #[test] + fn test_arbitrary_disjointness() { + // `python_version == 'Linux'` is nonsense and ignored, thus the first marker + // is always `true` and not disjoint. + assert!(!is_disjoint( + "python_version == 'Linux'", + "python_version == '3.7.1'" + )); + } + + #[test] + fn test_version_disjointness() { + assert!(!is_disjoint( + "os_name == 'Linux'", + "python_version == '3.7.1'" + )); + + test_version_bounds_disjointness("python_version"); + + assert!(!is_disjoint( + "python_version == '3.7.*'", + "python_version == '3.7.1'" + )); + } + + #[test] + fn test_string_disjointness() { + assert!(!is_disjoint( + "os_name == 'Linux'", + "platform_version == '3.7.1'" + )); + assert!(!is_disjoint( + "implementation_version == '3.7.0'", + "python_version == '3.7.1'" + )); + + // basic version bounds checking should still work with lexicographical comparisons + test_version_bounds_disjointness("platform_version"); + + assert!(is_disjoint("os_name == 'Linux'", "os_name == 'OSX'")); + assert!(is_disjoint("os_name <= 'Linux'", "os_name == 'OSX'")); + + assert!(!is_disjoint( + "os_name in 'OSXLinuxWindows'", + "os_name == 'OSX'" + )); + assert!(!is_disjoint("'OSX' in os_name", "'Linux' in os_name")); + + // complicated `in` intersections are not supported + assert!(!is_disjoint("os_name in 'OSX'", "os_name in 'Linux'")); + assert!(!is_disjoint( + "os_name in 'OSXLinux'", + "os_name == 'Windows'" + )); + + assert!(is_disjoint( + "os_name in 'Windows'", + "os_name not in 'Windows'" + )); + assert!(is_disjoint( + "'Windows' in os_name", + "'Windows' not in os_name" + )); + + assert!(!is_disjoint("'Windows' in os_name", "'Windows' in os_name")); + assert!(!is_disjoint("'Linux' in os_name", "os_name not in 'Linux'")); + assert!(!is_disjoint("'Linux' not in os_name", "os_name in 'Linux'")); + } + + #[test] + fn test_combined_disjointness() { + assert!(!is_disjoint( + "os_name == 'a' and platform_version == '1'", + "os_name == 'a'" + )); + assert!(!is_disjoint( + "os_name == 'a' or platform_version == '1'", + "os_name == 'a'" + )); + + assert!(is_disjoint( + "os_name == 'a' and platform_version == '1'", + "os_name == 'a' and platform_version == '2'" + )); + assert!(is_disjoint( + "os_name == 'a' and platform_version == '1'", + "'2' == platform_version and os_name == 'a'" + )); + assert!(!is_disjoint( + "os_name == 'a' or platform_version == '1'", + "os_name == 'a' or platform_version == '2'" + )); + + assert!(is_disjoint( + "sys_platform == 'darwin' and implementation_name == 'pypy'", + "sys_platform == 'bar' or implementation_name == 'foo'", + )); + assert!(is_disjoint( + "sys_platform == 'bar' or implementation_name == 'foo'", + "sys_platform == 'darwin' and implementation_name == 'pypy'", + )); + } + + #[test] + fn test_arbitrary() { + assert!(m("'wat' == 'wat'").is_true()); + assert!(m("os_name ~= 'wat'").is_true()); + assert!(m("python_version == 'Linux'").is_true()); + assert!(m("os_name ~= 'wat' or 'wat' == 'wat' and python_version == 'Linux'").is_true()); + } + + #[test] + fn test_is_false() { + assert!(m("python_version < '3.10' and python_version >= '3.10'").is_false()); + assert!(m("(python_version < '3.10' and python_version >= '3.10') \ + or (python_version < '3.9' and python_version >= '3.9')",) + .is_false()); + + assert!(!m("python_version < '3.10'").is_false()); + assert!(!m("python_version < '0'").is_false()); + assert!(!m("python_version < '3.10' and python_version >= '3.9'").is_false()); + assert!(!m("python_version < '3.10' or python_version >= '3.11'").is_false()); + } + + fn test_version_bounds_disjointness(version: &str) { + assert!(!is_disjoint( + format!("{version} > '2.7.0'"), + format!("{version} == '3.6.0'") + )); + assert!(!is_disjoint( + format!("{version} >= '3.7.0'"), + format!("{version} == '3.7.1'") + )); + assert!(!is_disjoint( + format!("{version} >= '3.7.0'"), + format!("'3.7.1' == {version}") + )); + + assert!(is_disjoint( + format!("{version} >= '3.7.1'"), + format!("{version} == '3.7.0'") + )); + assert!(is_disjoint( + format!("'3.7.1' <= {version}"), + format!("{version} == '3.7.0'") + )); + + assert!(is_disjoint( + format!("{version} < '3.7.0'"), + format!("{version} == '3.7.0'") + )); + assert!(is_disjoint( + format!("'3.7.0' > {version}"), + format!("{version} == '3.7.0'") + )); + assert!(is_disjoint( + format!("{version} < '3.7.0'"), + format!("{version} == '3.7.1'") + )); + + assert!(is_disjoint( + format!("{version} == '3.7.0'"), + format!("{version} == '3.7.1'") + )); + assert!(is_disjoint( + format!("{version} == '3.7.0'"), + format!("{version} != '3.7.0'") + )); + } + + fn assert_simplifies(left: &str, right: &str) { + assert_eq!(m(left), m(right)); + assert_eq!(m(left).try_to_string().unwrap(), right); + } + + fn assert_true(marker: &str) { + assert!(m(marker).is_true(), "{marker} != true"); + } + + fn is_disjoint(left: impl AsRef, right: impl AsRef) -> bool { + m(left.as_ref()).is_disjoint(&m(right.as_ref())) } } diff --git a/crates/pep508-rs/src/unnamed.rs b/crates/pep508-rs/src/unnamed.rs index 19367936a288..2e07c65accc6 100644 --- a/crates/pep508-rs/src/unnamed.rs +++ b/crates/pep508-rs/src/unnamed.rs @@ -136,7 +136,7 @@ impl Display for UnnamedRequirement { .join(",") )?; } - if let Some(marker) = &self.marker { + if let Some(marker) = self.marker.as_ref().and_then(MarkerTree::contents) { write!(f, " ; {marker}")?; } Ok(()) @@ -172,7 +172,7 @@ fn parse_unnamed_requirement( let marker = if cursor.peek_char() == Some(';') { // Skip past the semicolon cursor.next(); - Some(parse::parse_markers_cursor(cursor, reporter)?) + parse::parse_markers_cursor(cursor, reporter)? } else { None }; diff --git a/crates/pypi-types/src/requirement.rs b/crates/pypi-types/src/requirement.rs index 23162631b938..7a7fa4bd6f26 100644 --- a/crates/pypi-types/src/requirement.rs +++ b/crates/pypi-types/src/requirement.rs @@ -3,10 +3,12 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use distribution_filename::DistExtension; -use pep440_rs::VersionSpecifiers; -use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, VersionOrUrl}; +use serde::Serialize; use thiserror::Error; use url::Url; + +use pep440_rs::VersionSpecifiers; +use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, VersionOrUrl}; use uv_fs::PortablePathBuf; use uv_git::{GitReference, GitSha, GitUrl}; use uv_normalize::{ExtraName, PackageName}; @@ -37,6 +39,11 @@ pub struct Requirement { pub name: PackageName, #[serde(skip_serializing_if = "Vec::is_empty", default)] pub extras: Vec, + #[serde( + skip_serializing_if = "marker_is_empty", + serialize_with = "serialize_marker", + default + )] pub marker: Option, #[serde(flatten)] pub source: RequirementSource, @@ -44,6 +51,17 @@ pub struct Requirement { pub origin: Option, } +fn marker_is_empty(marker: &Option) -> bool { + marker.as_ref().and_then(MarkerTree::contents).is_none() +} + +fn serialize_marker(marker: &Option, s: S) -> Result +where + S: serde::Serializer, +{ + marker.as_ref().unwrap().contents().unwrap().serialize(s) +} + impl Requirement { /// Returns whether the markers apply for the given environment. /// @@ -238,7 +256,7 @@ impl Display for Requirement { write!(f, " @ {url}")?; } } - if let Some(marker) = &self.marker { + if let Some(marker) = self.marker.as_ref().and_then(MarkerTree::contents) { write!(f, " ; {marker}")?; } Ok(()) diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-poetry-with-hashes.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-poetry-with-hashes.txt.snap index 1857cd8ed02b..556273fbf294 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-poetry-with-hashes.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-poetry-with-hashes.txt.snap @@ -24,28 +24,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4.0", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( @@ -78,28 +57,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( @@ -132,35 +90,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4", - }, - }, - ), - Expression( - String { - key: PlatformSystem, - operator: Equal, - value: "Windows", - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0' and platform_system == 'Windows', ), origin: Some( File( @@ -193,28 +123,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4.0", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( @@ -248,28 +157,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4.0", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-poetry-with-hashes.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-poetry-with-hashes.txt.snap index 1857cd8ed02b..556273fbf294 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-poetry-with-hashes.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-poetry-with-hashes.txt.snap @@ -24,28 +24,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4.0", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( @@ -78,28 +57,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( @@ -132,35 +90,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4", - }, - }, - ), - Expression( - String { - key: PlatformSystem, - operator: Equal, - value: "Windows", - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0' and platform_system == 'Windows', ), origin: Some( File( @@ -193,28 +123,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4.0", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( @@ -248,28 +157,7 @@ RequirementsTxt { ), ), marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.8", - }, - }, - ), - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: LessThan, - version: "4.0", - }, - }, - ), - ], - ), + python_version >= '3.8' and python_version < '4.0', ), origin: Some( File( diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-editable.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-editable.txt.snap index 958a8d6bfb52..ebbce9d1cde8 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-editable.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-editable.txt.snap @@ -168,26 +168,7 @@ RequirementsTxt { ), ], marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.9", - }, - }, - ), - Expression( - String { - key: OsName, - operator: Equal, - value: "posix", - }, - ), - ], - ), + python_version >= '3.9' and os_name == 'posix', ), origin: Some( File( @@ -246,26 +227,7 @@ RequirementsTxt { ), ], marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.9", - }, - }, - ), - Expression( - String { - key: OsName, - operator: Equal, - value: "posix", - }, - ), - ], - ), + python_version >= '3.9' and os_name == 'posix', ), origin: Some( File( @@ -317,26 +279,7 @@ RequirementsTxt { }, extras: [], marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.9", - }, - }, - ), - Expression( - String { - key: OsName, - operator: Equal, - value: "posix", - }, - ), - ], - ), + python_version >= '3.9' and os_name == 'posix', ), origin: Some( File( diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-windows-editable.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-windows-editable.txt.snap index 97b6ddded02c..d648d594c2e3 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-windows-editable.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-windows-editable.txt.snap @@ -168,26 +168,7 @@ RequirementsTxt { ), ], marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.9", - }, - }, - ), - Expression( - String { - key: OsName, - operator: Equal, - value: "posix", - }, - ), - ], - ), + python_version >= '3.9' and os_name == 'posix', ), origin: Some( File( @@ -246,26 +227,7 @@ RequirementsTxt { ), ], marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.9", - }, - }, - ), - Expression( - String { - key: OsName, - operator: Equal, - value: "posix", - }, - ), - ], - ), + python_version >= '3.9' and os_name == 'posix', ), origin: Some( File( @@ -317,26 +279,7 @@ RequirementsTxt { }, extras: [], marker: Some( - And( - [ - Expression( - Version { - key: PythonVersion, - specifier: VersionSpecifier { - operator: GreaterThanEqual, - version: "3.9", - }, - }, - ), - Expression( - String { - key: OsName, - operator: Equal, - value: "posix", - }, - ), - ], - ), + python_version >= '3.9' and os_name == 'posix', ), origin: Some( File( diff --git a/crates/uv-configuration/src/constraints.rs b/crates/uv-configuration/src/constraints.rs index 4bea7359b10f..9a5d892dc998 100644 --- a/crates/uv-configuration/src/constraints.rs +++ b/crates/uv-configuration/src/constraints.rs @@ -63,8 +63,7 @@ impl Constraints { let Some(extra_expression) = requirement .marker .as_ref() - .and_then(|marker| marker.top_level_extra()) - .cloned() + .and_then(MarkerTree::top_level_extra) else { // Case 2: A non-optional dependency with constraint(s). return Either::Right(Either::Right( @@ -79,7 +78,7 @@ impl Constraints { Either::Right(Either::Left(std::iter::once(requirement).chain( constraints.iter().cloned().map(move |constraint| { // Add the extra to the override marker. - let mut joint_marker = MarkerTree::Expression(extra_expression.clone()); + let mut joint_marker = MarkerTree::expression(extra_expression.clone()); if let Some(marker) = &constraint.marker { joint_marker.and(marker.clone()); } diff --git a/crates/uv-configuration/src/overrides.rs b/crates/uv-configuration/src/overrides.rs index a605b6b20cc6..f0c12aafcfe9 100644 --- a/crates/uv-configuration/src/overrides.rs +++ b/crates/uv-configuration/src/overrides.rs @@ -53,7 +53,7 @@ impl Overrides { let Some(extra_expression) = requirement .marker .as_ref() - .and_then(|marker| marker.top_level_extra()) + .and_then(MarkerTree::top_level_extra) else { // Case 2: A non-optional dependency with override(s). return Either::Right(Either::Right(overrides.iter().map(Cow::Borrowed))); @@ -63,17 +63,19 @@ impl Overrides { // // When the original requirement is an optional dependency, the override(s) need to // be optional for the same extra, otherwise we activate extras that should be inactive. - Either::Right(Either::Left(overrides.iter().map(|override_requirement| { - // Add the extra to the override marker. - let mut joint_marker = MarkerTree::Expression(extra_expression.clone()); - if let Some(marker) = &override_requirement.marker { - joint_marker.and(marker.clone()); - } - Cow::Owned(Requirement { - marker: Some(joint_marker.clone()), - ..override_requirement.clone() - }) - }))) + Either::Right(Either::Left(overrides.iter().map( + move |override_requirement| { + // Add the extra to the override marker. + let mut joint_marker = MarkerTree::expression(extra_expression.clone()); + if let Some(marker) = &override_requirement.marker { + joint_marker.and(marker.clone()); + } + Cow::Owned(Requirement { + marker: Some(joint_marker.clone()), + ..override_requirement.clone() + }) + }, + ))) }) } } diff --git a/crates/uv-pubgrub/Cargo.toml b/crates/uv-pubgrub/Cargo.toml new file mode 100644 index 000000000000..b8e96fd33399 --- /dev/null +++ b/crates/uv-pubgrub/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "uv-pubgrub" +version = "0.0.1" +edition = "2021" +description = "Common uv pubgrub types." + +[lints] +workspace = true + +[dependencies] +pep440_rs = { workspace = true } + +itertools = { workspace = true } +thiserror = { workspace = true } +pubgrub = { workspace = true } diff --git a/crates/uv-resolver/src/pubgrub/specifier.rs b/crates/uv-pubgrub/src/lib.rs similarity index 97% rename from crates/uv-resolver/src/pubgrub/specifier.rs rename to crates/uv-pubgrub/src/lib.rs index 15b969802bb8..cc45f5232ed4 100644 --- a/crates/uv-resolver/src/pubgrub/specifier.rs +++ b/crates/uv-pubgrub/src/lib.rs @@ -17,7 +17,7 @@ pub enum PubGrubSpecifierError { pub struct PubGrubSpecifier(Range); impl PubGrubSpecifier { - pub(crate) fn iter(&self) -> impl Iterator, &Bound)> { + pub fn iter(&self) -> impl Iterator, &Bound)> { self.0.iter() } } @@ -38,7 +38,7 @@ impl From for Range { impl PubGrubSpecifier { /// Convert [`VersionSpecifiers`] to a PubGrub-compatible version range, using PEP 440 /// semantics. - pub(crate) fn from_pep440_specifiers( + pub fn from_pep440_specifiers( specifiers: &VersionSpecifiers, ) -> Result { let range = specifiers @@ -52,7 +52,7 @@ impl PubGrubSpecifier { /// Convert the [`VersionSpecifier`] to a PubGrub-compatible version range, using PEP 440 /// semantics. - pub(crate) fn from_pep440_specifier( + pub fn from_pep440_specifier( specifier: &VersionSpecifier, ) -> Result { let ranges = match specifier.operator() { @@ -159,7 +159,7 @@ impl PubGrubSpecifier { /// is allowed for projects that declare `requires-python = ">3.13"`. /// /// See: - pub(crate) fn from_release_specifiers( + pub fn from_release_specifiers( specifiers: &VersionSpecifiers, ) -> Result { let range = specifiers @@ -182,7 +182,7 @@ impl PubGrubSpecifier { /// is allowed for projects that declare `requires-python = ">3.13"`. /// /// See: - pub(crate) fn from_release_specifier( + pub fn from_release_specifier( specifier: &VersionSpecifier, ) -> Result { let ranges = match specifier.operator() { diff --git a/crates/uv-requirements/src/source_tree.rs b/crates/uv-requirements/src/source_tree.rs index ee4cb2e9bd32..0f223f418a8e 100644 --- a/crates/uv-requirements/src/source_tree.rs +++ b/crates/uv-requirements/src/source_tree.rs @@ -106,7 +106,8 @@ impl<'a, Context: BuildContext> SourceTreeResolver<'a, Context> { origin: Some(origin.clone()), marker: requirement .marker - .and_then(|marker| marker.simplify_extras(extras)), + .map(|marker| marker.simplify_extras(extras)) + .filter(|marker| !marker.is_true()), ..requirement }) .collect(); @@ -129,7 +130,8 @@ impl<'a, Context: BuildContext> SourceTreeResolver<'a, Context> { requirement.marker = requirement .marker .take() - .and_then(|marker| marker.simplify_extras(&recursive.extras)); + .map(|marker| marker.simplify_extras(&recursive.extras)) + .filter(|marker| !marker.is_true()); } } diff --git a/crates/uv-resolver/Cargo.toml b/crates/uv-resolver/Cargo.toml index cad6be07dd43..d67b9d24981d 100644 --- a/crates/uv-resolver/Cargo.toml +++ b/crates/uv-resolver/Cargo.toml @@ -29,6 +29,7 @@ uv-distribution = { workspace = true } uv-fs = { workspace = true, features = ["serde"] } uv-git = { workspace = true } uv-normalize = { workspace = true } +uv-pubgrub = { workspace = true } uv-python = { workspace = true } uv-types = { workspace = true } uv-warnings = { workspace = true } diff --git a/crates/uv-resolver/src/error.rs b/crates/uv-resolver/src/error.rs index e8676befb456..d57a1793519d 100644 --- a/crates/uv-resolver/src/error.rs +++ b/crates/uv-resolver/src/error.rs @@ -7,7 +7,7 @@ use rustc_hash::FxHashMap; use distribution_types::{BuiltDist, IndexLocations, InstalledDist, SourceDist}; use pep440_rs::Version; -use pep508_rs::{MarkerTree, Requirement}; +use pep508_rs::MarkerTree; use uv_normalize::PackageName; use crate::candidate_selector::CandidateSelector; @@ -19,9 +19,6 @@ use crate::resolver::{IncompletePackage, ResolverMarkers, UnavailablePackage, Un #[derive(Debug, thiserror::Error)] pub enum ResolveError { - #[error("Failed to find a version of `{0}` that satisfies the requirement")] - NotFound(Requirement), - #[error(transparent)] Client(#[from] uv_client::Error), @@ -49,7 +46,7 @@ pub enum ResolveError { #[error("Requirements contain conflicting URLs for package `{0}`:\n- {}", _1.join("\n- "))] ConflictingUrlsUniversal(PackageName, Vec), - #[error("Requirements contain conflicting URLs for package `{package_name}` in split `{fork_markers}`:\n- {}", urls.join("\n- "))] + #[error("Requirements contain conflicting URLs for package `{package_name}` in split `{fork_markers:?}`:\n- {}", urls.join("\n- "))] ConflictingUrlsFork { package_name: PackageName, urls: Vec, @@ -137,7 +134,7 @@ impl NoSolutionError { "No solution found when resolving dependencies:".to_string() } ResolverMarkers::Fork(markers) => { - format!("No solution found when resolving dependencies for split ({markers}):") + format!("No solution found when resolving dependencies for split ({markers:?}):") } } } diff --git a/crates/uv-resolver/src/lock.rs b/crates/uv-resolver/src/lock.rs index 82ae1a7b1376..a5ceb99a3424 100644 --- a/crates/uv-resolver/src/lock.rs +++ b/crates/uv-resolver/src/lock.rs @@ -477,8 +477,12 @@ impl Lock { doc.insert("requires-python", value(requires_python.to_string())); } if let Some(ref fork_markers) = self.fork_markers { - let fork_markers = - each_element_on_its_line_array(fork_markers.iter().map(ToString::to_string)); + let fork_markers = each_element_on_its_line_array( + fork_markers + .iter() + .filter_map(MarkerTree::contents) + .map(|marker| marker.to_string()), + ); doc.insert("environment-markers", value(fork_markers)); } @@ -1018,10 +1022,11 @@ impl Package { for dep in deps { if let Some(mut dep) = dep.to_requirement(workspace_root, &mut dependency_extras)? { // Add back the extra marker expression. - let marker = MarkerTree::Expression(MarkerExpression::Extra { + let marker = MarkerTree::expression(MarkerExpression::Extra { operator: ExtraOperator::Equal, name: extra.clone(), }); + match dep.marker { Some(ref mut tree) => tree.and(marker), None => dep.marker = Some(marker), @@ -1079,8 +1084,12 @@ impl Package { self.id.to_toml(None, &mut table); if let Some(ref fork_markers) = self.fork_markers { - let wheels = - each_element_on_its_line_array(fork_markers.iter().map(ToString::to_string)); + let wheels = each_element_on_its_line_array( + fork_markers + .iter() + .filter_map(MarkerTree::contents) + .map(|marker| marker.to_string()), + ); table.insert("environment-markers", value(wheels)); } @@ -2288,7 +2297,7 @@ impl Dependency { .collect::(); table.insert("extra", value(extra_array)); } - if let Some(ref marker) = self.marker { + if let Some(marker) = self.marker.as_ref().and_then(MarkerTree::contents) { table.insert("marker", value(marker.to_string())); } diff --git a/crates/uv-resolver/src/marker.rs b/crates/uv-resolver/src/marker.rs index 64107534e221..c92ea0d095da 100644 --- a/crates/uv-resolver/src/marker.rs +++ b/crates/uv-resolver/src/marker.rs @@ -1,1136 +1,52 @@ -#![allow(clippy::enum_glob_use, clippy::single_match_else)] +use pep508_rs::{MarkerTree, MarkerTreeKind, MarkerValueVersion}; -use std::ops::Bound::{self, *}; -use std::ops::RangeBounds; - -use pubgrub::Range as PubGrubRange; -use rustc_hash::FxHashMap; - -use pep440_rs::{Version, VersionSpecifier}; -use pep508_rs::{ - ExtraName, ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerValueString, - MarkerValueVersion, -}; - -use crate::pubgrub::PubGrubSpecifier; use crate::RequiresPythonBound; -/// Returns true when it can be proven that the given marker expression -/// evaluates to true for precisely zero marker environments. -/// -/// When this returns false, it *may* be the case that is evaluates to -/// true for precisely zero marker environments. That is, this routine -/// never has false positives but may have false negatives. -pub(crate) fn is_definitively_empty_set(tree: &MarkerTree) -> bool { - match *tree { - // A conjunction is definitively empty when it is known that - // *any* two of its conjuncts are disjoint. Since this would - // imply that the entire conjunction could never be true. - MarkerTree::And(ref trees) => { - // Since this is quadratic in the case where the - // expression is *not* empty, we limit ourselves - // to a small number of conjuncts. In practice, - // this should hopefully cover most cases. - if trees.len() > 10 { - return false; - } - for (i, tree1) in trees.iter().enumerate() { - for tree2 in &trees[i..] { - if is_disjoint(tree1, tree2) { - return true; - } - } - } - false - } - // A disjunction is definitively empty when all of its - // disjuncts are definitively empty. - MarkerTree::Or(ref trees) => trees.iter().all(is_definitively_empty_set), - // An "arbitrary" expression is always false, so we - // at least know it is definitively empty. - MarkerTree::Expression(MarkerExpression::Arbitrary { .. }) => true, - // Can't really do much with a single expression. There are maybe - // trivial cases we could check (like `python_version < '0'`), but I'm - // not sure it's worth doing? - MarkerTree::Expression(_) => false, - } -} - -/// Returns `true` if there is no environment in which both marker trees can both apply, i.e. -/// the expression `first and second` is always false. -pub(crate) fn is_disjoint(first: &MarkerTree, second: &MarkerTree) -> bool { - let (expr1, expr2) = match (first, second) { - (MarkerTree::Expression(expr1), MarkerTree::Expression(expr2)) => (expr1, expr2), - // `Or` expressions are disjoint if all clauses are disjoint. - (other, MarkerTree::Or(exprs)) | (MarkerTree::Or(exprs), other) => { - return exprs.iter().all(|tree1| is_disjoint(tree1, other)) - } - // `And` expressions are disjoint if any clause is disjoint. - (other, MarkerTree::And(exprs)) | (MarkerTree::And(exprs), other) => { - return exprs.iter().any(|tree1| is_disjoint(tree1, other)); - } - }; - - match (expr1, expr2) { - // `Arbitrary` expressions always evaluate to `false`, and are thus always disjoint. - (MarkerExpression::Arbitrary { .. }, _) | (_, MarkerExpression::Arbitrary { .. }) => true, - (MarkerExpression::Version { .. }, expr2) => version_is_disjoint(expr1, expr2), - (MarkerExpression::String { .. }, expr2) => string_is_disjoint(expr1, expr2), - (MarkerExpression::Extra { operator, name }, expr2) => { - extra_is_disjoint(operator, name, expr2) - } - } -} - -/// Returns `true` if this string expression does not intersect with the given expression. -fn string_is_disjoint(this: &MarkerExpression, other: &MarkerExpression) -> bool { - use MarkerOperator::*; - - // Extract the normalized string expressions. - let Some((key, operator, value)) = extract_string_expression(this) else { - return false; - }; - let Some((key2, operator2, value2)) = extract_string_expression(other) else { - return false; - }; - - // Distinct string expressions are not disjoint. - if key != key2 { - return false; - } - - match (operator, operator2) { - // The only disjoint expressions involving strict inequality are `key != value` and `key == value`. - (NotEqual, Equal) | (Equal, NotEqual) => return value == value2, - (NotEqual, _) | (_, NotEqual) => return false, - // Similarly for `in` and `not in`. - (In, NotIn) | (NotIn, In) => return value == value2, - (In | NotIn, _) | (_, In | NotIn) => return false, - // As well as the inverse. - (Contains, NotContains) | (NotContains, Contains) => return value == value2, - (Contains | NotContains, _) | (_, Contains | NotContains) => return false, - _ => {} - } - - let bounds = string_bounds(value, operator); - let bounds2 = string_bounds(value2, operator2); - - // Make sure the ranges do not intersect. - if range_exists::<&str>(&bounds2.start_bound(), &bounds.end_bound()) - && range_exists::<&str>(&bounds.start_bound(), &bounds2.end_bound()) - { - return false; - } - - true -} - -pub(crate) fn python_range(expr: &MarkerExpression) -> Option> { - match expr { - MarkerExpression::Version { - key: MarkerValueVersion::PythonFullVersion, - specifier, - } => { - // Simplify using PEP 440 semantics. - let specifier = PubGrubSpecifier::from_pep440_specifier(specifier).ok()?; - - // Convert to PubGrub. - Some(PubGrubRange::from(specifier)) - } - MarkerExpression::Version { - key: MarkerValueVersion::PythonVersion, - specifier, - } => { - // Simplify using release-only semantics, since `python_version` is always `major.minor`. - let specifier = PubGrubSpecifier::from_release_specifier(specifier).ok()?; - - // Convert to PubGrub. - Some(PubGrubRange::from(specifier)) - } - _ => None, - } -} - /// Returns the minimum Python version that can satisfy the [`MarkerTree`], if it's constrained. -pub(crate) fn requires_python_marker(tree: &MarkerTree) -> Option { - match tree { - MarkerTree::Expression(expr) => { - // Extract the supported Python range. - let range = python_range(expr)?; - - // Extract the lower bound. - let (lower, _) = range.iter().next()?; - Some(RequiresPythonBound::new(lower.clone())) - } - MarkerTree::And(trees) => { - // Take the maximum of any nested expressions. - trees.iter().filter_map(requires_python_marker).max() - } - MarkerTree::Or(trees) => { - // If all subtrees have a bound, take the minimum. - let mut min_version = None; - for tree in trees { - let version = requires_python_marker(tree)?; - min_version = match min_version { - Some(min_version) => Some(std::cmp::min(min_version, version)), - None => Some(version), - }; - } - min_version - } - } -} - -/// Normalizes this marker tree. -/// -/// This function does a number of operations to normalize a marker tree recursively: -/// - Sort and flatten all nested expressions. -/// - Simplify expressions. This includes combining overlapping version ranges, removing duplicate -/// expressions, and removing redundant expressions. -/// - Normalize the order of version expressions to the form ` ` -/// (i.e., not the reverse). -/// -/// This is useful in cases where creating conjunctions or disjunctions might occur in a non-deterministic -/// order. This routine will attempt to erase the distinction created by such a construction. -/// -/// This returns `None` in the case of normalization turning a `MarkerTree` -/// into an expression that is known to match all possible marker -/// environments. Note though that this may return an empty disjunction, e.g., -/// `MarkerTree::Or(vec![])`, which matches precisely zero marker environments. -pub(crate) fn normalize( - mut tree: MarkerTree, - bound: Option<&RequiresPythonBound>, -) -> Option { - // Filter out redundant expressions that show up before and after normalization. - filter_all(&mut tree); - let mut tree = normalize_all(tree, bound)?; - filter_all(&mut tree); - Some(tree) -} - -/// Normalize the marker tree recursively. -pub(crate) fn normalize_all( - tree: MarkerTree, - bound: Option<&RequiresPythonBound>, -) -> Option { - match tree { - MarkerTree::And(trees) => { - let mut reduced = Vec::new(); - let mut versions: FxHashMap<_, Vec<_>> = FxHashMap::default(); - - for subtree in trees { - // Normalize nested expressions as much as possible first. - // - // If the expression gets normalized out (e.g., `version < '3.8' and version >= '3.8'`), - // omit it. - let Some(subtree) = normalize_all(subtree, bound) else { - continue; - }; - - match subtree { - MarkerTree::Or(_) => reduced.push(subtree), - // Flatten nested `And` expressions. - MarkerTree::And(subtrees) => reduced.extend(subtrees), - // Extract expressions we may be able to simplify more. - MarkerTree::Expression(ref expr) => { - if let Some((key, range)) = keyed_range(expr) { - versions.entry(key.clone()).or_default().push(range); - continue; +pub(crate) fn requires_python(tree: &MarkerTree) -> Option { + fn collect_python_markers(tree: &MarkerTree, markers: &mut Vec) { + match tree.kind() { + MarkerTreeKind::True | MarkerTreeKind::False => {} + MarkerTreeKind::Version(marker) => match marker.key() { + MarkerValueVersion::PythonVersion | MarkerValueVersion::PythonFullVersion => { + for (range, tree) in marker.edges() { + if !tree.is_false() { + // Extract the lower bound. + let (lower, _) = range.iter().next().unwrap(); + markers.push(RequiresPythonBound::new(lower.clone())); } - - reduced.push(subtree); } } - } - - // Combine version ranges. - simplify_ranges(&mut reduced, versions, |ranges| { - ranges - .iter() - .fold(PubGrubRange::full(), |acc, range| acc.intersection(range)) - }); - - reduced.sort(); - reduced.dedup(); - - match reduced.len() { - 0 => None, - 1 => Some(reduced.remove(0)), - _ => Some(MarkerTree::And(reduced)), - } - } - - MarkerTree::Or(trees) => { - let mut reduced = Vec::new(); - let mut versions: FxHashMap<_, Vec<_>> = FxHashMap::default(); - - for subtree in trees { - // Normalize nested expressions as much as possible first. - // - // If the expression gets normalized out (e.g., `version < '3.8' and version >= '3.8'`), - // return `true`. - let subtree = normalize_all(subtree, bound)?; - - match subtree { - MarkerTree::And(_) => reduced.push(subtree), - // Flatten nested `Or` expressions. - MarkerTree::Or(subtrees) => { - for subtree in subtrees { - match subtree { - // Look one level deeper for expressions to simplify, as - // `normalize_all` can return `MarkerTree::Or` for some expressions. - MarkerTree::Expression(ref expr) => { - if let Some((key, range)) = keyed_range(expr) { - versions.entry(key.clone()).or_default().push(range); - continue; - } - reduced.push(subtree); - } - _ => reduced.push(subtree), - } - } - } - // Extract expressions we may be able to simplify more. - MarkerTree::Expression(ref expr) => { - if let Some((key, range)) = keyed_range(expr) { - versions.entry(key.clone()).or_default().push(range); - continue; - } - - reduced.push(subtree); + MarkerValueVersion::ImplementationVersion => { + for (_, tree) in marker.edges() { + collect_python_markers(&tree, markers); } } - } - - // Combine version ranges. - simplify_ranges(&mut reduced, versions, |ranges| { - ranges - .iter() - .fold(PubGrubRange::empty(), |acc, range| acc.union(range)) - }); - - reduced.sort(); - reduced.dedup(); - - // If the reduced trees contain complementary terms (e.g., `sys_platform == 'linux' or sys_platform != 'linux'`), - // the expression is always true and can be removed. - if contains_complements(&reduced) { - return None; - } - - match reduced.len() { - 0 => None, - 1 => Some(reduced.remove(0)), - _ => Some(MarkerTree::Or(reduced)), - } - } - - // If the marker is redundant given the supported Python range, remove it. - // - // For example, `python_version >= '3.7'` is redundant with `requires-python: '>=3.8'`. - MarkerTree::Expression(expr) - if bound.is_some_and(|bound| { - python_range(&expr).is_some_and(|supported_range| { - PubGrubRange::from(bound.clone()).subset_of(&supported_range) - }) - }) => - { - None - } - - MarkerTree::Expression(ref expr) => { - if let Some((key, range)) = keyed_range(expr) { - // If multiple terms are required to express the range, flatten them into an `Or` - // expression. - let mut iter = range.iter().flat_map(VersionSpecifier::from_bounds); - let first = iter.next().unwrap(); - if let Some(second) = iter.next() { - Some(MarkerTree::Or( - std::iter::once(first) - .chain(std::iter::once(second)) - .chain(iter) - .map(|specifier| { - MarkerTree::Expression(MarkerExpression::Version { - key: key.clone(), - specifier, - }) - }) - .collect(), - )) - } else { - Some(MarkerTree::Expression(MarkerExpression::Version { - key: key.clone(), - specifier: first, - })) + }, + MarkerTreeKind::String(marker) => { + for (_, tree) in marker.children() { + collect_python_markers(&tree, markers); } - } else { - Some(tree) - } - } - } -} - -/// Removes redundant expressions from the tree recursively. -/// -/// This function does not attempt to flatten or clean the tree and may leave it in a denormalized state. -pub(crate) fn filter_all(tree: &mut MarkerTree) { - match tree { - MarkerTree::And(trees) => { - for subtree in &mut *trees { - filter_all(subtree); } - - for conjunct in collect_expressions(trees) { - // Filter out redundant disjunctions (by the Absorption Law). - trees.retain_mut(|tree| !filter_disjunctions(tree, &conjunct)); - - // Filter out redundant expressions in this conjunction. - for tree in &mut *trees { - filter_conjunct_exprs(tree, &conjunct); - } - } - } - - MarkerTree::Or(trees) => { - for subtree in &mut *trees { - filter_all(subtree); - } - - for disjunct in collect_expressions(trees) { - // Filter out redundant conjunctions (by the Absorption Law). - trees.retain_mut(|tree| !filter_conjunctions(tree, &disjunct)); - - // Filter out redundant expressions in this disjunction. - for tree in &mut *trees { - filter_disjunct_exprs(tree, &disjunct); - } - } - } - - MarkerTree::Expression(_) => {} - } -} - -/// Collects all direct leaf expressions from a list of marker trees. -/// -/// The expressions that are directly present within a conjunction or disjunction -/// can be used to filter out redundant expressions recursively in sibling trees. Importantly, -/// this function only returns expressions present at the top-level and does not search -/// recursively. -fn collect_expressions(trees: &[MarkerTree]) -> Vec { - trees - .iter() - .filter_map(|tree| match tree { - MarkerTree::Expression(expr) => Some(expr.clone()), - _ => None, - }) - .collect() -} - -/// Filters out the given expression recursively from any disjunctions in a marker tree. -/// -/// If a given expression is directly present in an outer disjunction, the tree can be satisfied -/// by the singular expression and thus we can filter it out from any disjunctions in sibling trees. -/// For example, `a or (b or a)` can be simplified to `a or b`. -fn filter_disjunct_exprs(tree: &mut MarkerTree, disjunct: &MarkerExpression) { - match tree { - MarkerTree::Or(trees) => { - trees.retain_mut(|tree| match tree { - MarkerTree::Expression(expr) => expr != disjunct, - _ => { - filter_disjunct_exprs(tree, disjunct); - true + MarkerTreeKind::In(marker) => { + for (_, tree) in marker.children() { + collect_python_markers(&tree, markers); } - }); - } - - MarkerTree::And(trees) => { - for tree in trees { - filter_disjunct_exprs(tree, disjunct); } - } - - MarkerTree::Expression(_) => {} - } -} - -/// Filters out the given expression recursively from any conjunctions in a marker tree. -/// -/// If a given expression is directly present in an outer conjunction, we can assume it is -/// true in all sibling trees and thus filter it out from any nested conjunctions. For example, -/// `a and (b and a)` can be simplified to `a and b`. -fn filter_conjunct_exprs(tree: &mut MarkerTree, conjunct: &MarkerExpression) { - match tree { - MarkerTree::And(trees) => { - trees.retain_mut(|tree| match tree { - MarkerTree::Expression(expr) => expr != conjunct, - _ => { - filter_conjunct_exprs(tree, conjunct); - true + MarkerTreeKind::Contains(marker) => { + for (_, tree) in marker.children() { + collect_python_markers(&tree, markers); } - }); - } - - MarkerTree::Or(trees) => { - for tree in trees { - filter_conjunct_exprs(tree, conjunct); } - } - - MarkerTree::Expression(_) => {} - } -} - -/// Filters out disjunctions recursively from a marker tree that contain the given expression. -/// -/// If a given expression is directly present in an outer conjunction, we can assume it is -/// true in all sibling trees and thus filter out any disjunctions that contain it. For example, -/// `a and (b or a)` can be simplified to `a`. -/// -/// Returns `true` if the outer tree should be removed. -fn filter_disjunctions(tree: &mut MarkerTree, conjunct: &MarkerExpression) -> bool { - let disjunction = match tree { - MarkerTree::Or(trees) => trees, - // Recurse because the tree might not have been flattened. - MarkerTree::And(trees) => { - trees.retain_mut(|tree| !filter_disjunctions(tree, conjunct)); - return trees.is_empty(); - } - MarkerTree::Expression(_) => return false, - }; - - let mut filter = Vec::new(); - for (i, tree) in disjunction.iter_mut().enumerate() { - match tree { - // Found a matching expression, filter out this entire tree. - MarkerTree::Expression(expr) if expr == conjunct => { - return true; - } - // Filter subtrees. - MarkerTree::Or(_) => { - if filter_disjunctions(tree, conjunct) { - filter.push(i); + MarkerTreeKind::Extra(marker) => { + for (_, tree) in marker.children() { + collect_python_markers(&tree, markers); } } - _ => {} } } - for i in filter.into_iter().rev() { - disjunction.remove(i); - } - - false -} - -/// Filters out conjunctions recursively from a marker tree that contain the given expression. -/// -/// If a given expression is directly present in an outer disjunction, the tree can be satisfied -/// by the singular expression and thus we can filter out any conjunctions in sibling trees that -/// contain it. For example, `a or (b and a)` can be simplified to `a`. -/// -/// Returns `true` if the outer tree should be removed. -fn filter_conjunctions(tree: &mut MarkerTree, disjunct: &MarkerExpression) -> bool { - let conjunction = match tree { - MarkerTree::And(trees) => trees, - // Recurse because the tree might not have been flattened. - MarkerTree::Or(trees) => { - trees.retain_mut(|tree| !filter_conjunctions(tree, disjunct)); - return trees.is_empty(); - } - MarkerTree::Expression(_) => return false, - }; - - let mut filter = Vec::new(); - for (i, tree) in conjunction.iter_mut().enumerate() { - match tree { - // Found a matching expression, filter out this entire tree. - MarkerTree::Expression(expr) if expr == disjunct => { - return true; - } - // Filter subtrees. - MarkerTree::And(_) => { - if filter_conjunctions(tree, disjunct) { - filter.push(i); - } - } - _ => {} - } - } - - for i in filter.into_iter().rev() { - conjunction.remove(i); - } - - false -} - -/// Simplify version expressions. -fn simplify_ranges( - reduced: &mut Vec, - versions: FxHashMap>>, - combine: impl Fn(&Vec>) -> PubGrubRange, -) { - for (key, ranges) in versions { - let simplified = combine(&ranges); - - // If this is a meaningless expressions with no valid intersection, add back - // the original ranges. - if simplified.is_empty() { - for specifier in ranges - .iter() - .flat_map(PubGrubRange::iter) - .flat_map(VersionSpecifier::from_bounds) - { - reduced.push(MarkerTree::Expression(MarkerExpression::Version { - specifier, - key: key.clone(), - })); - } - } - - // Add back the simplified segments. - for specifier in simplified.iter().flat_map(VersionSpecifier::from_bounds) { - reduced.push(MarkerTree::Expression(MarkerExpression::Version { - key: key.clone(), - specifier, - })); - } - } -} - -/// Extracts the key, value, and string from a string expression, reversing the operator if necessary. -fn extract_string_expression( - expr: &MarkerExpression, -) -> Option<(&MarkerValueString, MarkerOperator, &str)> { - match expr { - MarkerExpression::String { - key, - operator, - value, - } => Some((key, *operator, value)), - _ => None, - } -} - -/// Returns `true` if the range formed by an upper and lower bound is non-empty. -fn range_exists(lower: &Bound, upper: &Bound) -> bool { - match (lower, upper) { - (Included(s), Included(e)) => s <= e, - (Included(s), Excluded(e)) => s < e, - (Excluded(s), Included(e)) => s < e, - (Excluded(s), Excluded(e)) => s < e, - (Unbounded, _) | (_, Unbounded) => true, - } -} - -/// Returns the lower and upper bounds of a string inequality. -/// -/// Panics if called on the `!=`, `in`, or `not in` operators. -fn string_bounds(value: &str, operator: MarkerOperator) -> (Bound<&str>, Bound<&str>) { - use MarkerOperator::*; - match operator { - Equal => (Included(value), Included(value)), - // TODO: not really sure what this means for strings - TildeEqual => (Included(value), Included(value)), - GreaterThan => (Excluded(value), Unbounded), - GreaterEqual => (Included(value), Unbounded), - LessThan => (Unbounded, Excluded(value)), - LessEqual => (Unbounded, Included(value)), - NotEqual | In | NotIn | Contains | NotContains => unreachable!(), - } -} - -/// Returns `true` if this extra expression does not intersect with the given expression. -fn extra_is_disjoint(operator: &ExtraOperator, name: &ExtraName, other: &MarkerExpression) -> bool { - let MarkerExpression::Extra { - operator: operator2, - name: name2, - } = other - else { - return false; - }; - - // extra expressions are only disjoint if they require existence and non-existence of the same extra - operator != operator2 && name == name2 -} - -/// Returns `true` if this version expression does not intersect with the given expression. -fn version_is_disjoint(this: &MarkerExpression, other: &MarkerExpression) -> bool { - let Some((key, range)) = keyed_range(this) else { - return false; - }; - - // if this is not a version expression it may intersect - let Some((key2, range2)) = keyed_range(other) else { - return false; - }; - - // distinct version expressions are not disjoint - if key != key2 { - return false; - } - - // there is no version that is contained in both ranges - range.is_disjoint(&range2) -} - -/// Return `true` if the tree contains complementary terms (e.g., `sys_platform == 'linux' or sys_platform != 'linux'`). -fn contains_complements(trees: &[MarkerTree]) -> bool { - let mut terms = FxHashMap::default(); - for tree in trees { - let MarkerTree::Expression(MarkerExpression::String { - key, - operator, - value, - }) = tree - else { - continue; - }; - - match operator { - MarkerOperator::Equal => { - if let Some(MarkerOperator::NotEqual) = terms.insert((key, value), operator) { - return true; - } - } - MarkerOperator::NotEqual => { - if let Some(MarkerOperator::Equal) = terms.insert((key, value), operator) { - return true; - } - } - _ => {} - } - } - false -} - -/// Returns the key and version range for a version expression. -fn keyed_range(expr: &MarkerExpression) -> Option<(&MarkerValueVersion, PubGrubRange)> { - let (key, specifier) = match expr { - MarkerExpression::Version { key, specifier } => (key, specifier.clone()), - _ => return None, - }; - - // Simplify using either PEP 440 or release-only semantics. - let pubgrub_specifier = match expr { - MarkerExpression::Version { - key: MarkerValueVersion::PythonVersion, - .. - } => PubGrubSpecifier::from_release_specifier(&specifier).ok()?, - MarkerExpression::Version { - key: MarkerValueVersion::PythonFullVersion, - .. - } => PubGrubSpecifier::from_pep440_specifier(&specifier).ok()?, - _ => return None, - }; - - Some((key, pubgrub_specifier.into())) -} - -#[cfg(test)] -mod tests { - use pep508_rs::TracingReporter; - - use super::*; - - #[test] - fn simplify() { - assert_marker_equal( - "python_version == '3.9' or python_version == '3.9'", - "python_version == '3.9'", - ); - - assert_marker_equal( - "python_version < '3.17' or python_version < '3.18'", - "python_version < '3.18'", - ); - - assert_marker_equal( - "python_version > '3.17' or python_version > '3.18' or python_version > '3.12'", - "python_version > '3.12'", - ); - - // a quirk of how pubgrub works, but this is considered part of normalization - assert_marker_equal( - "python_version > '3.17.post4' or python_version > '3.18.post4'", - "python_version > '3.17'", - ); - - assert_marker_equal( - "python_version < '3.17' and python_version < '3.18'", - "python_version < '3.17'", - ); - - assert_marker_equal( - "python_version <= '3.18' and python_version == '3.18'", - "python_version == '3.18'", - ); - - assert_marker_equal( - "python_version <= '3.18' or python_version == '3.18'", - "python_version <= '3.18'", - ); - - assert_marker_equal( - "python_version <= '3.15' or (python_version <= '3.17' and python_version < '3.16')", - "python_version < '3.16'", - ); - - assert_marker_equal( - "(python_version > '3.17' or python_version > '3.16') and python_version > '3.15'", - "python_version > '3.16'", - ); - - assert_marker_equal( - "(python_version > '3.17' or python_version > '3.16') and python_version > '3.15' and implementation_version == '1'", - "implementation_version == '1' and python_version > '3.16'", - ); - - assert_marker_equal( - "('3.17' < python_version or '3.16' < python_version) and '3.15' < python_version and implementation_version == '1'", - "implementation_version == '1' and python_version > '3.16'", - ); - - assert_marker_equal("extra == 'a' or extra == 'a'", "extra == 'a'"); - assert_marker_equal( - "extra == 'a' and extra == 'a' or extra == 'b'", - "extra == 'a' or extra == 'b'", - ); - - // bogus expressions are retained but still normalized - assert_marker_equal( - "python_version < '3.17' and '3.18' == python_version", - "python_version == '3.18' and python_version < '3.17'", - ); - - // flatten nested expressions - assert_marker_equal( - "((extra == 'a' and extra == 'b') and extra == 'c') and extra == 'b'", - "extra == 'a' and extra == 'b' and extra == 'c'", - ); - - assert_marker_equal( - "((extra == 'a' or extra == 'b') or extra == 'c') or extra == 'b'", - "extra == 'a' or extra == 'b' or extra == 'c'", - ); - - // complex expressions - assert_marker_equal( - "extra == 'a' or (extra == 'a' and extra == 'b')", - "extra == 'a'", - ); - - assert_marker_equal( - "extra == 'a' and (extra == 'a' or extra == 'b')", - "extra == 'a'", - ); - - assert_marker_equal( - "(extra == 'a' and (extra == 'a' or extra == 'b')) or extra == 'd'", - "extra == 'a' or extra == 'd'", - ); - - assert_marker_equal( - "((extra == 'a' and extra == 'b') or extra == 'c') or extra == 'b'", - "extra == 'b' or extra == 'c'", - ); - - assert_marker_equal( - "((extra == 'a' or extra == 'b') and extra == 'c') and extra == 'b'", - "extra == 'b' and extra == 'c'", - ); - - assert_marker_equal( - "((extra == 'a' or extra == 'b') and extra == 'c') or extra == 'b'", - "extra == 'b' or (extra == 'a' and extra == 'c')", - ); - - // post-normalization filtering - assert_marker_equal( - "(python_version < '3.1' or python_version < '3.2') and (python_version < '3.2' or python_version == '3.3')", - "python_version < '3.2'", - ); - - // normalize out redundant ranges - assert_normalizes_out("python_version < '3.12.0rc1' or python_version >= '3.12.0rc1'"); - - assert_normalizes_out( - "extra == 'a' or (python_version < '3.12.0rc1' or python_version >= '3.12.0rc1')", - ); - - assert_normalizes_to( - "extra == 'a' and (python_version < '3.12.0rc1' or python_version >= '3.12.0rc1')", - "extra == 'a'", - ); - - // normalize `!=` operators - assert_normalizes_out("python_version != '3.10' or python_version < '3.12'"); - - assert_normalizes_to( - "python_version != '3.10' or python_version > '3.12'", - "python_version < '3.10' or python_version > '3.10'", - ); - - assert_normalizes_to( - "python_version != '3.8' and python_version < '3.10'", - "python_version < '3.10' and (python_version < '3.8' or python_version > '3.8')", - ); - - assert_normalizes_to( - "python_version != '3.8' and python_version != '3.9'", - "(python_version < '3.8' or python_version > '3.8') and (python_version < '3.9' or python_version > '3.9')", - ); - - // normalize out redundant expressions - assert_normalizes_out("sys_platform == 'win32' or sys_platform != 'win32'"); - - assert_normalizes_out("'win32' == sys_platform or sys_platform != 'win32'"); - - assert_normalizes_out( - "sys_platform == 'win32' or sys_platform == 'win32' or sys_platform != 'win32'", - ); - - assert_normalizes_to( - "sys_platform == 'win32' and sys_platform != 'win32'", - "sys_platform == 'win32' and sys_platform != 'win32'", - ); - } - - #[test] - fn requires_python() { - assert_normalizes_out("python_version >= '3.8'"); - assert_normalizes_out("python_version >= '3.8' or sys_platform == 'win32'"); - - assert_normalizes_to( - "python_version >= '3.8' and sys_platform == 'win32'", - "sys_platform == 'win32'", - ); - - assert_normalizes_to("python_version == '3.8'", "python_version == '3.8'"); - - assert_normalizes_to("python_version <= '3.10'", "python_version <= '3.10'"); - } - - #[test] - fn extra_disjointness() { - assert!(!is_disjoint("extra == 'a'", "python_version == '1'")); - - assert!(!is_disjoint("extra == 'a'", "extra == 'a'")); - assert!(!is_disjoint("extra == 'a'", "extra == 'b'")); - assert!(!is_disjoint("extra == 'b'", "extra == 'a'")); - assert!(!is_disjoint("extra == 'b'", "extra != 'a'")); - assert!(!is_disjoint("extra != 'b'", "extra == 'a'")); - assert!(is_disjoint("extra != 'b'", "extra == 'b'")); - assert!(is_disjoint("extra == 'b'", "extra != 'b'")); - } - - #[test] - fn arbitrary_disjointness() { - assert!(is_disjoint( - "python_version == 'Linux'", - "python_version == '3.7.1'" - )); - } - - #[test] - fn version_disjointness() { - assert!(!is_disjoint( - "os_name == 'Linux'", - "python_version == '3.7.1'" - )); - - test_version_bounds_disjointness("python_version"); - - assert!(!is_disjoint( - "python_version == '3.7.*'", - "python_version == '3.7.1'" - )); - } - - #[test] - fn string_disjointness() { - assert!(!is_disjoint( - "os_name == 'Linux'", - "platform_version == '3.7.1'" - )); - assert!(!is_disjoint( - "implementation_version == '3.7.0'", - "python_version == '3.7.1'" - )); - - // basic version bounds checking should still work with lexicographical comparisons - test_version_bounds_disjointness("platform_version"); - - assert!(is_disjoint("os_name == 'Linux'", "os_name == 'OSX'")); - assert!(is_disjoint("os_name <= 'Linux'", "os_name == 'OSX'")); - - assert!(!is_disjoint( - "os_name in 'OSXLinuxWindows'", - "os_name == 'OSX'" - )); - assert!(!is_disjoint("'OSX' in os_name", "'Linux' in os_name")); - - // complicated `in` intersections are not supported - assert!(!is_disjoint("os_name in 'OSX'", "os_name in 'Linux'")); - assert!(!is_disjoint( - "os_name in 'OSXLinux'", - "os_name == 'Windows'" - )); - - assert!(is_disjoint( - "os_name in 'Windows'", - "os_name not in 'Windows'" - )); - assert!(is_disjoint( - "'Windows' in os_name", - "'Windows' not in os_name" - )); - - assert!(!is_disjoint("'Windows' in os_name", "'Windows' in os_name")); - assert!(!is_disjoint("'Linux' in os_name", "os_name not in 'Linux'")); - assert!(!is_disjoint("'Linux' not in os_name", "os_name in 'Linux'")); - } - - #[test] - fn combined_disjointness() { - assert!(!is_disjoint( - "os_name == 'a' and platform_version == '1'", - "os_name == 'a'" - )); - assert!(!is_disjoint( - "os_name == 'a' or platform_version == '1'", - "os_name == 'a'" - )); - - assert!(is_disjoint( - "os_name == 'a' and platform_version == '1'", - "os_name == 'a' and platform_version == '2'" - )); - assert!(is_disjoint( - "os_name == 'a' and platform_version == '1'", - "'2' == platform_version and os_name == 'a'" - )); - assert!(!is_disjoint( - "os_name == 'a' or platform_version == '1'", - "os_name == 'a' or platform_version == '2'" - )); - - assert!(is_disjoint( - "sys_platform == 'darwin' and implementation_name == 'pypy'", - "sys_platform == 'bar' or implementation_name == 'foo'", - )); - assert!(is_disjoint( - "sys_platform == 'bar' or implementation_name == 'foo'", - "sys_platform == 'darwin' and implementation_name == 'pypy'", - )); - } - - #[test] - fn is_definitively_empty_set() { - assert!(is_empty("'wat' == 'wat'")); - assert!(is_empty( - "python_version < '3.10' and python_version >= '3.10'" - )); - assert!(is_empty( - "(python_version < '3.10' and python_version >= '3.10') \ - or (python_version < '3.9' and python_version >= '3.9')", - )); - - assert!(!is_empty("python_version < '3.10'")); - assert!(!is_empty("python_version < '0'")); - assert!(!is_empty( - "python_version < '3.10' and python_version >= '3.9'" - )); - assert!(!is_empty( - "python_version < '3.10' or python_version >= '3.11'" - )); - } - - fn test_version_bounds_disjointness(version: &str) { - assert!(!is_disjoint( - format!("{version} > '2.7.0'"), - format!("{version} == '3.6.0'") - )); - assert!(!is_disjoint( - format!("{version} >= '3.7.0'"), - format!("{version} == '3.7.1'") - )); - assert!(!is_disjoint( - format!("{version} >= '3.7.0'"), - format!("'3.7.1' == {version}") - )); - - assert!(is_disjoint( - format!("{version} >= '3.7.1'"), - format!("{version} == '3.7.0'") - )); - assert!(is_disjoint( - format!("'3.7.1' <= {version}"), - format!("{version} == '3.7.0'") - )); - - assert!(is_disjoint( - format!("{version} < '3.7.0'"), - format!("{version} == '3.7.0'") - )); - assert!(is_disjoint( - format!("'3.7.0' > {version}"), - format!("{version} == '3.7.0'") - )); - assert!(is_disjoint( - format!("{version} < '3.7.0'"), - format!("{version} == '3.7.1'") - )); - - assert!(is_disjoint( - format!("{version} == '3.7.0'"), - format!("{version} == '3.7.1'") - )); - assert!(is_disjoint( - format!("{version} == '3.7.0'"), - format!("{version} != '3.7.0'") - )); - } - - fn is_empty(tree: &str) -> bool { - let tree = MarkerTree::parse_reporter(tree, &mut TracingReporter).unwrap(); - super::is_definitively_empty_set(&tree) - } - - fn is_disjoint(one: impl AsRef, two: impl AsRef) -> bool { - let one = MarkerTree::parse_reporter(one.as_ref(), &mut TracingReporter).unwrap(); - let two = MarkerTree::parse_reporter(two.as_ref(), &mut TracingReporter).unwrap(); - super::is_disjoint(&one, &two) && super::is_disjoint(&two, &one) - } - - fn assert_marker_equal(one: impl AsRef, two: impl AsRef) { - let bound = RequiresPythonBound::new(Included(Version::new([3, 8]))); - let tree1 = MarkerTree::parse_reporter(one.as_ref(), &mut TracingReporter).unwrap(); - let tree1 = normalize(tree1, Some(&bound)).unwrap(); - let tree2 = MarkerTree::parse_reporter(two.as_ref(), &mut TracingReporter).unwrap(); - assert_eq!( - tree1.to_string(), - tree2.to_string(), - "failed to normalize {}", - one.as_ref() - ); - } - - fn assert_normalizes_to(before: impl AsRef, after: impl AsRef) { - let bound = RequiresPythonBound::new(Included(Version::new([3, 8]))); - let normalized = MarkerTree::parse_reporter(before.as_ref(), &mut TracingReporter) - .unwrap() - .clone(); - let normalized = normalize(normalized, Some(&bound)).unwrap(); - assert_eq!(normalized.to_string(), after.as_ref()); - } - - fn assert_normalizes_out(before: impl AsRef) { - let bound = RequiresPythonBound::new(Included(Version::new([3, 8]))); - let normalized = MarkerTree::parse_reporter(before.as_ref(), &mut TracingReporter) - .unwrap() - .clone(); - assert!(normalize(normalized, Some(&bound)).is_none()); - } + let mut markers = Vec::new(); + collect_python_markers(tree, &mut markers); + markers.into_iter().min() } diff --git a/crates/uv-resolver/src/pubgrub/mod.rs b/crates/uv-resolver/src/pubgrub/mod.rs index 587de74402ae..d6aa611f3d86 100644 --- a/crates/uv-resolver/src/pubgrub/mod.rs +++ b/crates/uv-resolver/src/pubgrub/mod.rs @@ -3,11 +3,10 @@ pub(crate) use crate::pubgrub::distribution::PubGrubDistribution; pub(crate) use crate::pubgrub::package::{PubGrubPackage, PubGrubPackageInner, PubGrubPython}; pub(crate) use crate::pubgrub::priority::{PubGrubPriorities, PubGrubPriority}; pub(crate) use crate::pubgrub::report::PubGrubReportFormatter; -pub use crate::pubgrub::specifier::{PubGrubSpecifier, PubGrubSpecifierError}; +pub use uv_pubgrub::{PubGrubSpecifier, PubGrubSpecifierError}; mod dependencies; mod distribution; mod package; mod priority; mod report; -mod specifier; diff --git a/crates/uv-resolver/src/pubgrub/package.rs b/crates/uv-resolver/src/pubgrub/package.rs index f8d74ef25fb0..994dd94864a1 100644 --- a/crates/uv-resolver/src/pubgrub/package.rs +++ b/crates/uv-resolver/src/pubgrub/package.rs @@ -1,7 +1,7 @@ use std::ops::Deref; use std::sync::Arc; -use pep508_rs::MarkerTree; +use pep508_rs::{MarkerTree, MarkerTreeContents}; use uv_normalize::{ExtraName, GroupName, PackageName}; /// [`Arc`] wrapper around [`PubGrubPackageInner`] to make cloning (inside PubGrub) cheap. @@ -49,7 +49,7 @@ pub enum PubGrubPackageInner { name: PackageName, extra: Option, dev: Option, - marker: Option, + marker: Option, }, /// A proxy package to represent a dependency with an extra (e.g., `black[colorama]`). /// @@ -67,7 +67,7 @@ pub enum PubGrubPackageInner { Extra { name: PackageName, extra: ExtraName, - marker: Option, + marker: Option, }, /// A proxy package to represent an enabled "dependency group" (e.g., development dependencies). /// @@ -77,7 +77,7 @@ pub enum PubGrubPackageInner { Dev { name: PackageName, dev: GroupName, - marker: Option, + marker: Option, }, /// A proxy package for a base package with a marker (e.g., `black; python_version >= "3.6"`). /// @@ -85,7 +85,7 @@ pub enum PubGrubPackageInner { /// rather than the `Marker` variant. Marker { name: PackageName, - marker: MarkerTree, + marker: MarkerTreeContents, }, } @@ -94,14 +94,16 @@ impl PubGrubPackage { pub(crate) fn from_package( name: PackageName, extra: Option, - mut marker: Option, + marker: Option, ) -> Self { // Remove all extra expressions from the marker, since we track extras // separately. This also avoids an issue where packages added via // extras end up having two distinct marker expressions, which in turn // makes them two distinct packages. This results in PubGrub being // unable to unify version constraints across such packages. - marker = marker.and_then(|m| m.simplify_extras_with(|_| true)); + let marker = marker + .map(|m| m.simplify_extras_with(|_| true)) + .and_then(|marker| marker.contents()); if let Some(extra) = extra { Self(Arc::new(PubGrubPackageInner::Extra { name, @@ -155,8 +157,10 @@ impl PubGrubPackage { PubGrubPackageInner::Root(_) | PubGrubPackageInner::Python(_) => None, PubGrubPackageInner::Package { marker, .. } | PubGrubPackageInner::Extra { marker, .. } - | PubGrubPackageInner::Dev { marker, .. } => marker.as_ref(), - PubGrubPackageInner::Marker { marker, .. } => Some(marker), + | PubGrubPackageInner::Dev { marker, .. } => { + marker.as_ref().map(MarkerTreeContents::as_ref) + } + PubGrubPackageInner::Marker { marker, .. } => Some(marker.as_ref()), } } diff --git a/crates/uv-resolver/src/requires_python.rs b/crates/uv-resolver/src/requires_python.rs index 3002a0ea5b9f..7efb14e75100 100644 --- a/crates/uv-resolver/src/requires_python.rs +++ b/crates/uv-resolver/src/requires_python.rs @@ -214,7 +214,7 @@ impl RequiresPython { // tree we would generate would always evaluate to // `true` because every possible Python version would // satisfy it. - Bound::Unbounded => return MarkerTree::And(vec![]), + Bound::Unbounded => return MarkerTree::TRUE, Bound::Excluded(version) => (Operator::GreaterThan, version.clone().without_local()), Bound::Included(version) => { (Operator::GreaterThanEqual, version.clone().without_local()) @@ -248,10 +248,11 @@ impl RequiresPython { // impossible here). specifier: VersionSpecifier::from_version(op, version).unwrap(), }; - MarkerTree::And(vec![ - MarkerTree::Expression(expr_python_version), - MarkerTree::Expression(expr_python_full_version), - ]) + + let mut conjunction = MarkerTree::TRUE; + conjunction.and(MarkerTree::expression(expr_python_version)); + conjunction.and(MarkerTree::expression(expr_python_full_version)); + conjunction } /// Returns `false` if the wheel's tags state it can't be used in the given Python version diff --git a/crates/uv-resolver/src/resolution/display.rs b/crates/uv-resolver/src/resolution/display.rs index 91e8ad9392de..656fb5babf38 100644 --- a/crates/uv-resolver/src/resolution/display.rs +++ b/crates/uv-resolver/src/resolution/display.rs @@ -11,7 +11,7 @@ use pep508_rs::MarkerTree; use uv_normalize::PackageName; use crate::resolution::{RequirementsTxtDist, ResolutionGraphNode}; -use crate::{marker, ResolutionGraph, ResolverMarkers}; +use crate::{ResolutionGraph, ResolverMarkers}; static UNIVERSAL_MARKERS: ResolverMarkers = ResolverMarkers::Universal { fork_preferences: None, @@ -410,7 +410,7 @@ fn propagate_markers(mut graph: IntermediatePetGraph) -> IntermediatePetGraph { } if let DisplayResolutionGraphNode::Dist(node) = &mut graph[index] { - node.markers = marker_tree.and_then(|marker| marker::normalize(marker, None)); + node.markers = marker_tree; }; } diff --git a/crates/uv-resolver/src/resolution/graph.rs b/crates/uv-resolver/src/resolution/graph.rs index 254c6331dfba..be736c8a6c98 100644 --- a/crates/uv-resolver/src/resolution/graph.rs +++ b/crates/uv-resolver/src/resolution/graph.rs @@ -5,6 +5,7 @@ use petgraph::{ graph::{Graph, NodeIndex}, Directed, }; +use pubgrub::Range; use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; use distribution_types::{ @@ -12,7 +13,7 @@ use distribution_types::{ VersionOrUrlRef, }; use pep440_rs::{Version, VersionSpecifier}; -use pep508_rs::{MarkerEnvironment, MarkerTree, VerbatimUrl}; +use pep508_rs::{MarkerEnvironment, MarkerTree, MarkerTreeKind, VerbatimUrl}; use pypi_types::{HashDigest, ParsedUrlError, Requirement, VerbatimParsedUrl, Yanked}; use uv_configuration::{Constraints, Overrides}; use uv_distribution::Metadata; @@ -158,12 +159,14 @@ impl ResolutionGraph { .cloned(); // Normalize any markers. - for edge in petgraph.edge_indices() { - if let Some(marker) = petgraph[edge].take() { - petgraph[edge] = crate::marker::normalize( - marker, - requires_python.as_ref().map(RequiresPython::bound), - ); + if let Some(ref requires_python) = requires_python { + for edge in petgraph.edge_indices() { + if let Some(marker) = petgraph[edge].take() { + petgraph[edge] = Some(marker.simplify_python_versions( + Range::from(requires_python.bound_major_minor().clone()), + Range::from(requires_python.bound().clone()), + )); + } } } @@ -185,6 +188,8 @@ impl ResolutionGraph { .expect("A non-forking resolution exists in forking mode") .clone() }) + // Any unsatisfiable forks were skipped. + .filter(|fork| !fork.is_false()) .collect(), ) }; @@ -511,16 +516,31 @@ impl ResolutionGraph { /// Add all marker parameters from the given tree to the given set. fn add_marker_params_from_tree(marker_tree: &MarkerTree, set: &mut IndexSet) { - match marker_tree { - MarkerTree::Expression(MarkerExpression::Version { key, .. }) => { - set.insert(MarkerParam::Version(key.clone())); + match marker_tree.kind() { + MarkerTreeKind::True => {} + MarkerTreeKind::False => {} + MarkerTreeKind::Version(marker) => { + set.insert(MarkerParam::Version(marker.key().clone())); + for (_, tree) in marker.edges() { + add_marker_params_from_tree(&tree, set); + } } - MarkerTree::Expression(MarkerExpression::String { key, .. }) => { - set.insert(MarkerParam::String(key.clone())); + MarkerTreeKind::String(marker) => { + set.insert(MarkerParam::String(marker.key().clone())); + for (_, tree) in marker.children() { + add_marker_params_from_tree(&tree, set); + } } - MarkerTree::And(ref exprs) | MarkerTree::Or(ref exprs) => { - for expr in exprs { - add_marker_params_from_tree(expr, set); + MarkerTreeKind::In(marker) => { + set.insert(MarkerParam::String(marker.key().clone())); + for (_, tree) in marker.children() { + add_marker_params_from_tree(&tree, set); + } + } + MarkerTreeKind::Contains(marker) => { + set.insert(MarkerParam::String(marker.key().clone())); + for (_, tree) in marker.children() { + add_marker_params_from_tree(&tree, set); } } // We specifically don't care about these for the @@ -528,9 +548,11 @@ impl ResolutionGraph { // file. Quoted strings are marker values given by the // user. We don't track those here, since we're only // interested in which markers are used. - MarkerTree::Expression( - MarkerExpression::Extra { .. } | MarkerExpression::Arbitrary { .. }, - ) => {} + MarkerTreeKind::Extra(marker) => { + for (_, tree) in marker.children() { + add_marker_params_from_tree(&tree, set); + } + } } } @@ -579,7 +601,7 @@ impl ResolutionGraph { // Generate the final marker expression as a conjunction of // strict equality terms. - let mut conjuncts = vec![]; + let mut conjunction = MarkerTree::TRUE; for marker_param in seen_marker_values { let expr = match marker_param { MarkerParam::Version(value_version) => { @@ -598,9 +620,9 @@ impl ResolutionGraph { } } }; - conjuncts.push(MarkerTree::Expression(expr)); + conjunction.and(MarkerTree::expression(expr)); } - Ok(MarkerTree::And(conjuncts)) + Ok(conjunction) } /// If there are multiple distributions for the same package name, return the markers of the diff --git a/crates/uv-resolver/src/resolution/requirements_txt.rs b/crates/uv-resolver/src/resolution/requirements_txt.rs index 35049bbd0e8d..b58dd594b943 100644 --- a/crates/uv-resolver/src/resolution/requirements_txt.rs +++ b/crates/uv-resolver/src/resolution/requirements_txt.rs @@ -89,7 +89,11 @@ impl RequirementsTxtDist { } }; if let Some(given) = given { - return if let Some(markers) = self.markers.as_ref().filter(|_| include_markers) + return if let Some(markers) = self + .markers + .as_ref() + .filter(|_| include_markers) + .and_then(MarkerTree::contents) { Cow::Owned(format!("{given} ; {markers}")) } else { @@ -100,7 +104,12 @@ impl RequirementsTxtDist { } if self.extras.is_empty() || !include_extras { - if let Some(markers) = self.markers.as_ref().filter(|_| include_markers) { + if let Some(markers) = self + .markers + .as_ref() + .filter(|_| include_markers) + .and_then(MarkerTree::contents) + { Cow::Owned(format!("{} ; {}", self.dist.verbatim(), markers)) } else { self.dist.verbatim() @@ -109,7 +118,12 @@ impl RequirementsTxtDist { let mut extras = self.extras.clone(); extras.sort_unstable(); extras.dedup(); - if let Some(markers) = self.markers.as_ref().filter(|_| include_markers) { + if let Some(markers) = self + .markers + .as_ref() + .filter(|_| include_markers) + .and_then(MarkerTree::contents) + { Cow::Owned(format!( "{}[{}]{} ; {}", self.name(), diff --git a/crates/uv-resolver/src/resolver/fork_map.rs b/crates/uv-resolver/src/resolver/fork_map.rs index 16c49f7bb581..236ce148e62c 100644 --- a/crates/uv-resolver/src/resolver/fork_map.rs +++ b/crates/uv-resolver/src/resolver/fork_map.rs @@ -2,7 +2,6 @@ use pep508_rs::{MarkerTree, PackageName}; use pypi_types::Requirement; use rustc_hash::FxHashMap; -use crate::marker::is_disjoint; use crate::ResolverMarkers; /// A set of package names associated with a given fork. @@ -72,7 +71,7 @@ impl ForkMap { !entry .marker .as_ref() - .is_some_and(|marker| is_disjoint(fork, marker)) + .is_some_and(|marker| fork.is_disjoint(marker)) }) .map(|entry| &entry.value) .collect(), diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index e2048ae91dee..1850988cf505 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -45,7 +45,6 @@ use crate::dependency_provider::UvDependencyProvider; use crate::error::{NoSolutionError, ResolveError}; use crate::fork_urls::ForkUrls; use crate::manifest::Manifest; -use crate::marker::{normalize, requires_python_marker}; use crate::pins::FilePins; use crate::preferences::Preferences; use crate::pubgrub::{ @@ -69,7 +68,7 @@ pub use crate::resolver::provider::{ use crate::resolver::reporter::Facade; pub use crate::resolver::reporter::{BuildId, Reporter}; use crate::yanks::AllowedYanks; -use crate::{DependencyMode, Exclusions, FlatIndex, Options}; +use crate::{marker, DependencyMode, Exclusions, FlatIndex, Options}; mod availability; mod batch_prefetch; @@ -342,11 +341,11 @@ impl ResolverState ResolverState ResolverState ResolverState ResolverState ResolverState ResolverState ResolverState ResolverState Self { let combined_markers = self.markers.and(markers); - let combined_markers = - normalize(combined_markers, None).unwrap_or_else(|| MarkerTree::And(vec![])); // If the fork contains a narrowed Python requirement, apply it. - let python_requirement = requires_python_marker(&combined_markers) + let python_requirement = marker::requires_python(&combined_markers) .and_then(|marker| self.python_requirement.narrow(&marker)); if let Some(python_requirement) = python_requirement { if let Some(target) = python_requirement.target() { @@ -2363,7 +2364,7 @@ impl ForkState { to_url: to_url.cloned(), to_extra: None, to_dev: None, - marker: Some(dependency_marker.clone()), + marker: Some(dependency_marker.as_ref().clone()), }; edges.insert(edge); } @@ -2389,7 +2390,7 @@ impl ForkState { to_url: to_url.cloned(), to_extra: Some(dependency_extra.clone()), to_dev: None, - marker: dependency_marker.clone(), + marker: dependency_marker.clone().map(MarkerTree::from), }; edges.insert(edge); } @@ -2415,7 +2416,7 @@ impl ForkState { to_url: to_url.cloned(), to_extra: None, to_dev: Some(dependency_dev.clone()), - marker: dependency_marker.clone(), + marker: dependency_marker.clone().map(MarkerTree::from), }; edges.insert(edge); } @@ -2726,7 +2727,7 @@ impl Dependencies { } let mut forks = vec![Fork { dependencies: vec![], - markers: MarkerTree::And(vec![]), + markers: MarkerTree::TRUE, }]; let mut diverging_packages = Vec::new(); for (name, possible_forks) in by_name { @@ -2748,7 +2749,7 @@ impl Dependencies { assert!(fork_groups.forks.len() >= 2, "expected definitive fork"); let mut new_forks: Vec = vec![]; if let Some(markers) = fork_groups.remaining_universe() { - trace!("Adding split to cover possibly incomplete markers: {markers}"); + trace!("Adding split to cover possibly incomplete markers: {markers:?}"); let mut new_forks_for_remaining_universe = forks.clone(); for fork in &mut new_forks_for_remaining_universe { fork.markers.and(markers.clone()); @@ -2847,8 +2848,8 @@ impl Ord for Fork { // A higher `requires-python` requirement indicates a _lower-priority_ fork. We'd prefer // to solve `<3.7` before solving `>=3.7`, since the resolution produced by the former might // work for the latter, but the inverse is unlikely to be true. - let self_bound = requires_python_marker(&self.markers).unwrap_or_default(); - let other_bound = requires_python_marker(&other.markers).unwrap_or_default(); + let self_bound = marker::requires_python(&self.markers).unwrap_or_default(); + let other_bound = marker::requires_python(&other.markers).unwrap_or_default(); other_bound.cmp(&self_bound).then_with(|| { // If there's no difference, prioritize forks with upper bounds. We'd prefer to solve @@ -2894,12 +2895,10 @@ impl Fork { /// It is only added if the markers on the given package are not disjoint /// with this fork's markers. fn add_nonfork_package(&mut self, dependency: PubGrubDependency) { - use crate::marker::is_disjoint; - if dependency .package .marker() - .map_or(true, |marker| !is_disjoint(marker, &self.markers)) + .map_or(true, |marker| !marker.is_disjoint(&self.markers)) { self.dependencies.push(dependency); } @@ -2908,13 +2907,11 @@ impl Fork { /// Removes any dependencies in this fork whose markers are disjoint with /// its own markers. fn remove_disjoint_packages(&mut self) { - use crate::marker::is_disjoint; - self.dependencies.retain(|dependency| { dependency .package .marker() - .map_or(true, |pkg_marker| !is_disjoint(pkg_marker, &self.markers)) + .map_or(true, |pkg_marker| !pkg_marker.is_disjoint(&self.markers)) }); } } @@ -3041,9 +3038,13 @@ impl<'a> PossibleForkGroups<'a> { /// `None` is returned. But note that if a marker tree is returned, it is /// still possible for it to describe exactly zero marker environments. fn remaining_universe(&self) -> Option { - let have = MarkerTree::Or(self.forks.iter().map(PossibleFork::union).collect()); + let mut have = MarkerTree::FALSE; + for fork in &self.forks { + have.or(fork.union()); + } + let missing = have.negate(); - if crate::marker::is_definitively_empty_set(&missing) { + if missing.is_false() { return None; } Some(missing) @@ -3074,10 +3075,8 @@ impl<'a> PossibleFork<'a> { /// intersection with *any* of the package markers within this possible /// fork. fn is_overlapping(&self, candidate_package_markers: &MarkerTree) -> bool { - use crate::marker::is_disjoint; - for (_, package_markers) in &self.packages { - if !is_disjoint(candidate_package_markers, package_markers) { + if !candidate_package_markers.is_disjoint(package_markers) { return true; } } @@ -3089,16 +3088,11 @@ impl<'a> PossibleFork<'a> { /// Each marker expression in the union returned is guaranteed to be overlapping /// with at least one other expression in the same union. fn union(&self) -> MarkerTree { - let mut trees: Vec = self - .packages - .iter() - .map(|&(_, tree)| (*tree).clone()) - .collect(); - if trees.len() == 1 { - trees.pop().unwrap() - } else { - MarkerTree::Or(trees) + let mut union = MarkerTree::FALSE; + for &(_, marker) in &self.packages { + union.or(marker.clone()); } + union } } @@ -3124,5 +3118,5 @@ fn possible_to_satisfy_markers(markers: &MarkerTree, requirement: &Requirement) let Some(marker) = requirement.marker.as_ref() else { return true; }; - !crate::marker::is_disjoint(markers, marker) + !markers.is_disjoint(marker) } diff --git a/crates/uv-resolver/src/resolver/resolver_markers.rs b/crates/uv-resolver/src/resolver/resolver_markers.rs index 9aab2f04aa1b..8162c8442505 100644 --- a/crates/uv-resolver/src/resolver/resolver_markers.rs +++ b/crates/uv-resolver/src/resolver/resolver_markers.rs @@ -66,7 +66,7 @@ impl Display for ResolverMarkers { ResolverMarkers::Universal { .. } => f.write_str("universal"), ResolverMarkers::SpecificEnvironment(_) => f.write_str("specific environment"), ResolverMarkers::Fork(markers) => { - write!(f, "({markers})") + write!(f, "({markers:?})") } } } diff --git a/crates/uv/src/commands/pip/compile.rs b/crates/uv/src/commands/pip/compile.rs index ba69b41a8a4f..081214ddb729 100644 --- a/crates/uv/src/commands/pip/compile.rs +++ b/crates/uv/src/commands/pip/compile.rs @@ -407,12 +407,14 @@ pub(crate) async fn pip_compile( if include_marker_expression { if let ResolverMarkers::SpecificEnvironment(markers) = &markers { let relevant_markers = resolution.marker_tree(&top_level_index, markers)?; - writeln!( - writer, - "{}", - "# Pinned dependencies known to be valid for:".green() - )?; - writeln!(writer, "{}", format!("# {relevant_markers}").green())?; + if let Some(relevant_markers) = relevant_markers.contents() { + writeln!( + writer, + "{}", + "# Pinned dependencies known to be valid for:".green() + )?; + writeln!(writer, "{}", format!("# {relevant_markers}").green())?; + } } } diff --git a/crates/uv/tests/lock.rs b/crates/uv/tests/lock.rs index a3d1bab0535e..8247b2f408db 100644 --- a/crates/uv/tests/lock.rs +++ b/crates/uv/tests/lock.rs @@ -2129,8 +2129,8 @@ fn lock_upgrade_log_multi_version() -> Result<()> { version = 1 requires-python = ">=3.12" environment-markers = [ - "sys_platform == 'win32'", "sys_platform != 'win32'", + "sys_platform == 'win32'", ] [options] @@ -3586,14 +3586,12 @@ fn lock_python_version_marker_complement() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "python_full_version <= '3.10' and python_version == '3.10'", - "python_full_version <= '3.10' and python_version < '3.10'", - "python_full_version <= '3.10' and python_version < '3.10' and python_version > '3.10'", - "python_full_version <= '3.10' and python_version > '3.10'", + "python_full_version > '3.10' and python_version > '3.10'", "python_full_version > '3.10' and python_version == '3.10'", "python_full_version > '3.10' and python_version < '3.10'", - "python_full_version > '3.10' and python_version < '3.10' and python_version > '3.10'", - "python_full_version > '3.10' and python_version > '3.10'", + "python_full_version <= '3.10' and python_version > '3.10'", + "python_full_version <= '3.10' and python_version == '3.10'", + "python_full_version <= '3.10' and python_version < '3.10'", ] [options] @@ -3624,7 +3622,7 @@ fn lock_python_version_marker_complement() -> Result<()> { dependencies = [ { name = "attrs" }, { name = "iniconfig" }, - { name = "typing-extensions", marker = "python_full_version <= '3.10' or python_full_version > '3.10'" }, + { name = "typing-extensions" }, ] [[package]] @@ -4791,8 +4789,8 @@ fn lock_same_version_multiple_urls() -> Result<()> { version = 1 requires-python = ">=3.12" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform != 'darwin'", + "sys_platform == 'darwin'", ] [options] diff --git a/crates/uv/tests/lock_scenarios.rs b/crates/uv/tests/lock_scenarios.rs index 7141a4abfdc3..7a3e9486823b 100644 --- a/crates/uv/tests/lock_scenarios.rs +++ b/crates/uv/tests/lock_scenarios.rs @@ -87,8 +87,8 @@ fn fork_allows_non_conflicting_non_overlapping_dependencies() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -295,8 +295,8 @@ fn fork_basic() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -580,8 +580,8 @@ fn fork_filter_sibling_dependencies() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -873,7 +873,7 @@ fn fork_incomplete_markers() -> Result<()> { environment-markers = [ "python_version < '3.10'", "python_version >= '3.11'", - "python_version < '3.11' and python_version >= '3.10'", + "python_version >= '3.10' and python_version < '3.11'", ] [[package]] @@ -928,7 +928,7 @@ fn fork_incomplete_markers() -> Result<()> { dependencies = [ { name = "package-a", version = "1.0.0", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "python_version < '3.10'" }, { name = "package-a", version = "2.0.0", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "python_version >= '3.11'" }, - { name = "package-b", marker = "python_version < '3.10' or python_version >= '3.11' or (python_version < '3.11' and python_version >= '3.10')" }, + { name = "package-b" }, ] "### ); @@ -1221,10 +1221,10 @@ fn fork_marker_inherit_combined_allowed() -> Result<()> { requires-python = ">=3.8" environment-markers = [ "sys_platform == 'linux'", - "implementation_name == 'cpython' and sys_platform == 'darwin'", + "sys_platform != 'darwin' and sys_platform != 'linux'", "implementation_name == 'pypy' and sys_platform == 'darwin'", + "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name != 'cpython' and implementation_name != 'pypy' and sys_platform == 'darwin'", - "sys_platform != 'darwin' and sys_platform != 'linux'", ] [[package]] @@ -1232,8 +1232,8 @@ fn fork_marker_inherit_combined_allowed() -> Result<()> { version = "1.0.0" source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" } environment-markers = [ - "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name == 'pypy' and sys_platform == 'darwin'", + "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name != 'cpython' and implementation_name != 'pypy' and sys_platform == 'darwin'", ] dependencies = [ @@ -1265,7 +1265,7 @@ fn fork_marker_inherit_combined_allowed() -> Result<()> { "implementation_name == 'pypy' and sys_platform == 'darwin'", ] dependencies = [ - { name = "package-c", marker = "implementation_name == 'pypy' or sys_platform == 'linux'" }, + { name = "package-c", marker = "sys_platform == 'linux' or implementation_name == 'pypy'" }, ] sdist = { url = "https://astral-sh.github.io/packse/PACKSE_VERSION/files/fork_marker_inherit_combined_allowed_b-1.0.0.tar.gz", hash = "sha256:d6bd196a0a152c1b32e09f08e554d22ae6a6b3b916e39ad4552572afae5f5492" } wheels = [ @@ -1397,10 +1397,10 @@ fn fork_marker_inherit_combined_disallowed() -> Result<()> { requires-python = ">=3.8" environment-markers = [ "sys_platform == 'linux'", - "implementation_name == 'cpython' and sys_platform == 'darwin'", + "sys_platform != 'darwin' and sys_platform != 'linux'", "implementation_name == 'pypy' and sys_platform == 'darwin'", + "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name != 'cpython' and implementation_name != 'pypy' and sys_platform == 'darwin'", - "sys_platform != 'darwin' and sys_platform != 'linux'", ] [[package]] @@ -1408,8 +1408,8 @@ fn fork_marker_inherit_combined_disallowed() -> Result<()> { version = "1.0.0" source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" } environment-markers = [ - "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name == 'pypy' and sys_platform == 'darwin'", + "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name != 'cpython' and implementation_name != 'pypy' and sys_platform == 'darwin'", ] dependencies = [ @@ -1562,10 +1562,10 @@ fn fork_marker_inherit_combined() -> Result<()> { requires-python = ">=3.8" environment-markers = [ "sys_platform == 'linux'", - "implementation_name == 'cpython' and sys_platform == 'darwin'", + "sys_platform != 'darwin' and sys_platform != 'linux'", "implementation_name == 'pypy' and sys_platform == 'darwin'", + "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name != 'cpython' and implementation_name != 'pypy' and sys_platform == 'darwin'", - "sys_platform != 'darwin' and sys_platform != 'linux'", ] [[package]] @@ -1573,8 +1573,8 @@ fn fork_marker_inherit_combined() -> Result<()> { version = "1.0.0" source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" } environment-markers = [ - "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name == 'pypy' and sys_platform == 'darwin'", + "implementation_name == 'cpython' and sys_platform == 'darwin'", "implementation_name != 'cpython' and implementation_name != 'pypy' and sys_platform == 'darwin'", ] dependencies = [ @@ -1719,8 +1719,8 @@ fn fork_marker_inherit_isolated() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -1863,8 +1863,8 @@ fn fork_marker_inherit_transitive() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -2015,8 +2015,8 @@ fn fork_marker_inherit() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -2149,8 +2149,8 @@ fn fork_marker_limited_inherit() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -2206,7 +2206,7 @@ fn fork_marker_limited_inherit() -> Result<()> { dependencies = [ { name = "package-a", version = "1.0.0", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "sys_platform == 'darwin'" }, { name = "package-a", version = "2.0.0", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "sys_platform == 'linux'" }, - { name = "package-b", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "package-b" }, ] "### ); @@ -2299,8 +2299,8 @@ fn fork_marker_selection() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -2342,7 +2342,7 @@ fn fork_marker_selection() -> Result<()> { version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "package-a", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "package-a" }, { name = "package-b", version = "1.0.0", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "sys_platform == 'darwin'" }, { name = "package-b", version = "2.0.0", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "sys_platform == 'linux'" }, ] @@ -2449,8 +2449,8 @@ fn fork_marker_track() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'darwin'", "sys_platform == 'linux'", + "sys_platform == 'darwin'", "sys_platform != 'darwin' and sys_platform != 'linux'", ] @@ -2504,7 +2504,7 @@ fn fork_marker_track() -> Result<()> { version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "package-a", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "package-a" }, { name = "package-b", version = "2.7", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "sys_platform == 'darwin'" }, { name = "package-b", version = "2.8", source = { registry = "https://astral-sh.github.io/packse/PACKSE_VERSION/simple-html/" }, marker = "sys_platform == 'linux'" }, ] @@ -3712,8 +3712,8 @@ fn preferences_dependent_forking() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'linux'", "sys_platform != 'linux'", + "sys_platform == 'linux'", ] [[package]] diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index 3df9e076edfe..53ae853a5645 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -6471,23 +6471,23 @@ fn no_strip_markers_multiple_markers() -> Result<()> { ----- stdout ----- # This file was autogenerated by uv via the following command: # uv pip compile --cache-dir [CACHE_DIR] requirements.in --no-strip-markers --python-platform windows - attrs==23.2.0 ; python_version > '3.11' or sys_platform == 'win32' + attrs==23.2.0 ; sys_platform == 'win32' or python_version > '3.11' # via # outcome # trio - cffi==1.16.0 ; implementation_name != 'pypy' and os_name == 'nt' and (python_version > '3.11' or sys_platform == 'win32') + cffi==1.16.0 ; (implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'win32') or (python_version > '3.11' and implementation_name != 'pypy' and os_name == 'nt') # via trio - idna==3.6 ; python_version > '3.11' or sys_platform == 'win32' + idna==3.6 ; sys_platform == 'win32' or python_version > '3.11' # via trio - outcome==1.3.0.post0 ; python_version > '3.11' or sys_platform == 'win32' + outcome==1.3.0.post0 ; sys_platform == 'win32' or python_version > '3.11' # via trio - pycparser==2.21 ; implementation_name != 'pypy' and os_name == 'nt' and (python_version > '3.11' or sys_platform == 'win32') + pycparser==2.21 ; (implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'win32') or (python_version > '3.11' and implementation_name != 'pypy' and os_name == 'nt') # via cffi - sniffio==1.3.1 ; python_version > '3.11' or sys_platform == 'win32' + sniffio==1.3.1 ; sys_platform == 'win32' or python_version > '3.11' # via trio - sortedcontainers==2.4.0 ; python_version > '3.11' or sys_platform == 'win32' + sortedcontainers==2.4.0 ; sys_platform == 'win32' or python_version > '3.11' # via trio - trio==0.25.0 ; python_version > '3.11' or sys_platform == 'win32' + trio==0.25.0 ; sys_platform == 'win32' or python_version > '3.11' # via -r requirements.in ----- stderr ----- @@ -6618,7 +6618,7 @@ fn universal_conflicting() -> Result<()> { # via trio outcome==1.3.0.post0 ; sys_platform == 'darwin' or sys_platform == 'win32' # via trio - pycparser==2.21 ; (sys_platform == 'darwin' or sys_platform == 'win32') and ((implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')) + pycparser==2.21 ; (implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32') # via cffi sniffio==1.3.1 ; sys_platform == 'darwin' or sys_platform == 'win32' # via trio @@ -7209,27 +7209,27 @@ fn universal_disjoint_base_or_local_requirement() -> Result<()> { ----- stdout ----- # This file was autogenerated by uv via the following command: # uv pip compile --cache-dir [CACHE_DIR] requirements.in --universal - cmake==3.28.4 ; python_version <= '3.12' and python_version >= '3.11' and platform_machine == 'x86_64' and platform_system == 'Linux' + cmake==3.28.4 ; python_version >= '3.11' and python_version <= '3.12' and platform_machine == 'x86_64' and platform_system == 'Linux' # via triton - . ; python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11') + . # via -r requirements.in - filelock==3.13.1 ; python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11') or (python_version <= '3.12' and python_version >= '3.11' and platform_machine == 'x86_64' and platform_system == 'Linux') + filelock==3.13.1 # via # torch # triton - jinja2==3.1.3 ; python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11') + jinja2==3.1.3 # via torch - lit==18.1.2 ; python_version <= '3.12' and python_version >= '3.11' and platform_machine == 'x86_64' and platform_system == 'Linux' + lit==18.1.2 ; python_version >= '3.11' and python_version <= '3.12' and platform_machine == 'x86_64' and platform_system == 'Linux' # via triton - markupsafe==2.1.5 ; (python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11')) and (python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11')) + markupsafe==2.1.5 # via jinja2 - mpmath==1.3.0 ; (python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11')) and (python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11')) + mpmath==1.3.0 # via sympy - networkx==3.2.1 ; python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11') + networkx==3.2.1 # via torch - sympy==1.12 ; python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11') + sympy==1.12 # via torch - torch==2.0.0 ; python_version < '3.11' or (python_version <= '3.12' and python_version > '3.12' and python_version >= '3.11') + torch==2.0.0 ; python_version < '3.11' # via # -r requirements.in # example @@ -7237,14 +7237,14 @@ fn universal_disjoint_base_or_local_requirement() -> Result<()> { # via # -r requirements.in # example - torch==2.0.0+cu118 ; python_version <= '3.12' and python_version >= '3.11' + torch==2.0.0+cu118 ; python_version >= '3.11' and python_version <= '3.12' # via # -r requirements.in # example # triton - triton==2.0.0 ; python_version <= '3.12' and python_version >= '3.11' and platform_machine == 'x86_64' and platform_system == 'Linux' + triton==2.0.0 ; python_version >= '3.11' and python_version <= '3.12' and platform_machine == 'x86_64' and platform_system == 'Linux' # via torch - typing-extensions==4.10.0 ; python_version < '3.11' or python_version > '3.12' or (python_version <= '3.12' and python_version >= '3.11') + typing-extensions==4.10.0 # via torch ----- stderr ----- @@ -7453,7 +7453,7 @@ fn universal_nested_disjoint_local_requirement() -> Result<()> { # via triton . ; os_name == 'Linux' # via -r requirements.in - filelock==3.13.1 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine == 'x86_64' and platform_system == 'Linux') or (os_name == 'Linux' and platform_machine != 'x86_64') + filelock==3.13.1 # via # torch # triton @@ -7461,17 +7461,17 @@ fn universal_nested_disjoint_local_requirement() -> Result<()> { # via torch intel-openmp==2021.4.0 ; os_name != 'Linux' and platform_system == 'Windows' # via mkl - jinja2==3.1.3 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine != 'x86_64') + jinja2==3.1.3 # via torch lit==18.1.2 ; os_name == 'Linux' and platform_machine == 'x86_64' and platform_system == 'Linux' # via triton - markupsafe==2.1.5 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine != 'x86_64') + markupsafe==2.1.5 # via jinja2 mkl==2021.4.0 ; os_name != 'Linux' and platform_system == 'Windows' # via torch - mpmath==1.3.0 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine != 'x86_64') + mpmath==1.3.0 # via sympy - networkx==3.2.1 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine != 'x86_64') + networkx==3.2.1 # via torch nvidia-cublas-cu12==12.1.3.1 ; os_name != 'Linux' and platform_machine == 'x86_64' and platform_system == 'Linux' # via @@ -7504,7 +7504,7 @@ fn universal_nested_disjoint_local_requirement() -> Result<()> { # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 ; os_name != 'Linux' and platform_machine == 'x86_64' and platform_system == 'Linux' # via torch - sympy==1.12 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine != 'x86_64') + sympy==1.12 # via torch tbb==2021.11.0 ; os_name != 'Linux' and platform_system == 'Windows' # via mkl @@ -7521,7 +7521,7 @@ fn universal_nested_disjoint_local_requirement() -> Result<()> { # via -r requirements.in triton==2.0.0 ; os_name == 'Linux' and platform_machine == 'x86_64' and platform_system == 'Linux' # via torch - typing-extensions==4.10.0 ; os_name != 'Linux' or (os_name == 'Linux' and platform_machine == 'x86_64') or (os_name == 'Linux' and platform_machine != 'x86_64') + typing-extensions==4.10.0 # via torch ----- stderr ----- @@ -7753,7 +7753,7 @@ fn universal_transitive_disjoint_prerelease_requirement() -> Result<()> { ----- stdout ----- # This file was autogenerated by uv via the following command: # uv pip compile --cache-dir [CACHE_DIR] requirements.in --universal - cffi==1.16.0 ; os_name == 'linux' or platform_python_implementation != 'PyPy' + cffi==1.16.0 ; platform_python_implementation != 'PyPy' or os_name == 'linux' # via # -r requirements.in # cryptography @@ -8141,31 +8141,31 @@ fn universal_prefer_upper_bounds() -> Result<()> { ----- stdout ----- # This file was autogenerated by uv via the following command: # uv pip compile --cache-dir [CACHE_DIR] requirements.in -p 3.8 --universal - astroid==2.15.8 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + astroid==2.15.8 # via pylint colorama==0.4.6 ; sys_platform == 'win32' # via pylint dill==0.3.8 # via pylint - isort==5.13.2 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + isort==5.13.2 # via pylint - lazy-object-proxy==1.10.0 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + lazy-object-proxy==1.10.0 # via astroid - mccabe==0.7.0 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + mccabe==0.7.0 # via pylint - platformdirs==4.2.0 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + platformdirs==4.2.0 # via pylint pylint==2.17.7 # via -r requirements.in tomli==2.0.1 ; python_version < '3.11' # via pylint - tomlkit==0.12.4 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + tomlkit==0.12.4 # via pylint typing-extensions==4.10.0 ; python_version < '3.11' # via # astroid # pylint - wrapt==1.16.0 ; (python_version < '3.11' and sys_platform == 'darwin') or (python_version < '3.11' and sys_platform != 'darwin') or (python_version >= '3.11' and sys_platform == 'darwin') or (python_version >= '3.11' and sys_platform != 'darwin') + wrapt==1.16.0 # via astroid ----- stderr ----- From d91850aec8f29a2b9f11cfa6abba813a7065e77b Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Wed, 7 Aug 2024 21:51:40 -0400 Subject: [PATCH 02/13] simplify marker expressions when deserializing lockfile --- crates/uv-resolver/Cargo.toml | 1 + crates/uv-resolver/src/lock.rs | 48 ++++++++++++++++++++++++++ crates/uv/src/commands/project/lock.rs | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/crates/uv-resolver/Cargo.toml b/crates/uv-resolver/Cargo.toml index d67b9d24981d..ff775e2803a4 100644 --- a/crates/uv-resolver/Cargo.toml +++ b/crates/uv-resolver/Cargo.toml @@ -56,6 +56,7 @@ textwrap = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } +toml = { workspace = true } toml_edit = { workspace = true } tracing = { workspace = true } url = { workspace = true } diff --git a/crates/uv-resolver/src/lock.rs b/crates/uv-resolver/src/lock.rs index a5ceb99a3424..ea1ee264bc1d 100644 --- a/crates/uv-resolver/src/lock.rs +++ b/crates/uv-resolver/src/lock.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use either::Either; use itertools::Itertools; use petgraph::visit::EdgeRef; +use pubgrub::Range; use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; use toml_edit::{value, Array, ArrayOfTables, InlineTable, Item, Table, Value}; use url::Url; @@ -77,6 +78,53 @@ pub struct Lock { } impl Lock { + /// Deserialize the [`Lock`] from a TOML string. + pub fn from_toml(s: &str) -> Result { + let mut lock: Lock = toml::from_str(s)?; + + // Simplify all marker expressions based on the requires-python bound. + // + // This is necessary to ensure the a `Lock` deserialized from a lockfile compares + // equally to a newly created `Lock`. + // TODO(ibraheem): we should only simplify python versions when serializing or ensure + // the requires-python bound is enforced on construction to avoid this step. + if let Some(requires_python) = &lock.requires_python { + let python_version = Range::from(requires_python.bound_major_minor().clone()); + let python_full_version = Range::from(requires_python.bound().clone()); + + for package in &mut lock.packages { + for dep in &mut package.dependencies { + if let Some(marker) = &mut dep.marker { + *marker = marker.clone().simplify_python_versions( + python_version.clone(), + python_full_version.clone(), + ); + } + } + + for dep in package.optional_dependencies.values_mut().flatten() { + if let Some(marker) = &mut dep.marker { + *marker = marker.clone().simplify_python_versions( + python_version.clone(), + python_full_version.clone(), + ); + } + } + + for dep in package.dev_dependencies.values_mut().flatten() { + if let Some(marker) = &mut dep.marker { + *marker = marker.clone().simplify_python_versions( + python_version.clone(), + python_full_version.clone(), + ); + } + } + } + } + + Ok(lock) + } + /// Initialize a [`Lock`] from a [`ResolutionGraph`]. pub fn from_resolution_graph(graph: &ResolutionGraph) -> Result { let mut locked_dists = BTreeMap::new(); diff --git a/crates/uv/src/commands/project/lock.rs b/crates/uv/src/commands/project/lock.rs index 5514b3823b49..db1d0107d0d4 100644 --- a/crates/uv/src/commands/project/lock.rs +++ b/crates/uv/src/commands/project/lock.rs @@ -594,7 +594,7 @@ pub(crate) async fn commit(lock: &Lock, workspace: &Workspace) -> Result<(), Pro /// Returns `Ok(None)` if the lockfile does not exist. pub(crate) async fn read(workspace: &Workspace) -> Result, ProjectError> { match fs_err::tokio::read_to_string(&workspace.install_path().join("uv.lock")).await { - Ok(encoded) => match toml::from_str::(&encoded) { + Ok(encoded) => match Lock::from_toml(&encoded) { Ok(lock) => Ok(Some(lock)), Err(err) => { eprint!("Failed to parse lockfile; ignoring locked requirements: {err}"); From d9608389a10ced503e0a55fa05c2864865a22212 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 12:51:14 -0400 Subject: [PATCH 03/13] add tests for NP-complete marker simplification cases --- crates/pep508-rs/src/marker/simplify.rs | 14 +++++++-- crates/pep508-rs/src/marker/tree.rs | 40 ++++++++++++++++++------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/crates/pep508-rs/src/marker/simplify.rs b/crates/pep508-rs/src/marker/simplify.rs index c34581ceb00b..d3338ed61b10 100644 --- a/crates/pep508-rs/src/marker/simplify.rs +++ b/crates/pep508-rs/src/marker/simplify.rs @@ -193,13 +193,23 @@ fn simplify(dnf: &mut Vec>) { // For example, `A or (not A and B)` can be simplified to `A or B`, // eliminating the `not A` term. if other_clause.iter().all(|term| { + // For the term to be redundant in this clause, the other clause can + // contain the negation of the term but not the term itself. if term == skipped_term { return false; } + if is_negation(term, skipped_term) { + return true; + } // TODO(ibraheem): if we intern variables we could reduce this // from a linear search to an integer `HashSet` lookup - clause.contains(term) || is_negated(term, skipped_term) + clause + .iter() + .position(|x| x == term) + // If the term was already removed from this one, we cannot + // depend on it for further simplification. + .is_some_and(|i| !redundant_terms.contains(&i)) }) { redundant_terms.push(skipped); continue 'term; @@ -308,7 +318,7 @@ where } /// Returns `true` if the LHS is the negation of the RHS, or vice versa. -fn is_negated(left: &MarkerExpression, right: &MarkerExpression) -> bool { +fn is_negation(left: &MarkerExpression, right: &MarkerExpression) -> bool { match left { MarkerExpression::Version { key, specifier } => { let MarkerExpression::Version { diff --git a/crates/pep508-rs/src/marker/tree.rs b/crates/pep508-rs/src/marker/tree.rs index 9f93ec9b215e..1939401a3686 100644 --- a/crates/pep508-rs/src/marker/tree.rs +++ b/crates/pep508-rs/src/marker/tree.rs @@ -2047,6 +2047,36 @@ mod test { #[test] fn test_complex_marker_simplification() { + // This expression should simplify to: + // `(implementation_name == 'pypy' and sys_platform != 'win32') + // or (sys_platform == 'win32' or os_name != 'nt') + // or (implementation != 'pypy' or os_name == 'nt')` + // + // However, simplifying this expression is NP-complete and requires an exponential + // algorithm such as Quine-McCluskey, which is not currently implemented. + assert_simplifies( + "(implementation_name == 'pypy' and sys_platform != 'win32') + or (implementation_name != 'pypy' and sys_platform == 'win32') + or (sys_platform == 'win32' and os_name != 'nt') + or (sys_platform != 'win32' and os_name == 'nt')", + "(os_name != 'nt' and sys_platform == 'win32') \ + or (implementation_name != 'pypy' and os_name == 'nt') \ + or (implementation_name == 'pypy' and os_name != 'nt') \ + or (os_name == 'nt' and sys_platform != 'win32')", + ); + + // This is another case we cannot simplify fully, depending on the variable order. + // The expression is equivalent to `sys_platform == 'x' or (os_name == 'Linux' and platform_system == 'win32')`. + assert_simplifies( + "(os_name == 'Linux' and platform_system == 'win32') + or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'a') + or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'x') + or (os_name != 'Linux' and platform_system == 'win32' and sys_platform == 'x') + or (os_name == 'Linux' and platform_system != 'win32' and sys_platform == 'x') + or (os_name != 'Linux' and platform_system != 'win32' and sys_platform == 'x')", + "(os_name != 'Linux' and sys_platform == 'x') or (platform_system != 'win32' and sys_platform == 'x') or (os_name == 'Linux' and platform_system == 'win32')" + ); + assert_simplifies("python_version > '3.7'", "python_version > '3.7'"); assert_simplifies( @@ -2063,16 +2093,6 @@ mod test { or (python_version == '3.8' and sys_platform == 'win32')", ); - assert_simplifies( - "(os_name == 'Linux' and platform_system == 'win32') - or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'a') - or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'x') - or (os_name != 'Linux' and platform_system == 'win32' and sys_platform == 'x') - or (os_name == 'Linux' and platform_system != 'win32' and sys_platform == 'x') - or (os_name != 'Linux' and platform_system != 'win32' and sys_platform == 'x')", - "sys_platform == 'x' or (os_name == 'Linux' and platform_system == 'win32')", - ); - assert_simplifies( "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')", "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')" From c478bde3fbf282ca0232a39ed780cde3524b00a8 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 13:14:28 -0400 Subject: [PATCH 04/13] update lockfile snapshots --- crates/uv/tests/lock_scenarios.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/uv/tests/lock_scenarios.rs b/crates/uv/tests/lock_scenarios.rs index 7a3e9486823b..d086e1fa6a7d 100644 --- a/crates/uv/tests/lock_scenarios.rs +++ b/crates/uv/tests/lock_scenarios.rs @@ -3056,8 +3056,8 @@ fn preferences_dependent_forking_bistable() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'linux'", "sys_platform != 'linux'", + "sys_platform == 'linux'", ] [[package]] @@ -3434,8 +3434,8 @@ fn preferences_dependent_forking_tristable() -> Result<()> { version = 1 requires-python = ">=3.8" environment-markers = [ - "sys_platform == 'linux'", "sys_platform != 'linux'", + "sys_platform == 'linux'", ] [[package]] @@ -3882,10 +3882,10 @@ fn fork_remaining_universe_partitioning() -> Result<()> { requires-python = ">=3.8" environment-markers = [ "sys_platform == 'windows'", + "sys_platform != 'illumos' and sys_platform != 'windows'", "os_name == 'darwin' and sys_platform == 'illumos'", "os_name == 'linux' and sys_platform == 'illumos'", "os_name != 'darwin' and os_name != 'linux' and sys_platform == 'illumos'", - "sys_platform != 'illumos' and sys_platform != 'windows'", ] [[package]] From 1200ce0b7082b5ad32b0a6241e5a0797bc26f877 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 14:55:24 -0400 Subject: [PATCH 05/13] move new `pep508-rs` dependencies to workspace root --- Cargo.toml | 2 ++ crates/pep508-rs/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 98339258879b..1da88e1e21fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b axoupdater = { version = "0.7.0", default-features = false } backoff = { version = "0.4.0" } base64 = { version = "0.22.0" } +boxcar = { version = "0.2.5" } cachedir = { version = "0.3.1" } cargo-util = { version = "0.2.8" } chrono = { version = "0.4.31" } @@ -131,6 +132,7 @@ seahash = { version = "4.1.0" } serde = { version = "1.0.197", features = ["derive"] } serde_json = { version = "1.0.114" } sha2 = { version = "0.10.8" } +smallvec = { version = "1.13.2" } syn = { version = "2.0.66" } sys-info = { version = "0.9.1" } target-lexicon = {version = "0.12.14" } diff --git a/crates/pep508-rs/Cargo.toml b/crates/pep508-rs/Cargo.toml index 6d7d7c88a41f..2737e5bcf8ea 100644 --- a/crates/pep508-rs/Cargo.toml +++ b/crates/pep508-rs/Cargo.toml @@ -20,6 +20,7 @@ crate-type = ["cdylib", "rlib"] workspace = true [dependencies] +boxcar = { workspace = true } derivative = { workspace = true } itertools = { workspace = true } indexmap = { workspace = true } @@ -32,6 +33,7 @@ regex = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true, optional = true } +smallvec = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true, optional = true } unicode-width = { workspace = true } @@ -39,8 +41,6 @@ url = { workspace = true, features = ["serde"] } uv-fs = { workspace = true } uv-normalize = { workspace = true } uv-pubgrub = { workspace = true } -boxcar = "0.2.5" -smallvec = "1.13.2" [dev-dependencies] insta = { version = "1.36.1" } From c6ca22a0e48f4f4b89f247c245c6a2852ce31b42 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 14:58:30 -0400 Subject: [PATCH 06/13] remove unnecessary `pub` --- crates/pep508-rs/src/marker/algebra.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index 17f948e272ad..3131b8eac614 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -64,7 +64,7 @@ pub(crate) static INTERNER: LazyLock = LazyLock::new(Interner::default /// It also allows nodes to cheaply compared. #[derive(Default)] pub(crate) struct Interner { - pub shared: InternerShared, + pub(crate) shared: InternerShared, state: Mutex, } From e961cf7027f99f189a05d2cf31517a636e2f1c49 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 14:58:46 -0400 Subject: [PATCH 07/13] document invariants of marker tree `Edges` --- crates/pep508-rs/src/marker/algebra.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index 3131b8eac614..db2a80f9610b 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -458,16 +458,16 @@ pub(crate) enum Edges { // The edges of a version variable, representing a disjoint set of ranges that cover // the output space. // - // Note that all ranges are simple, meaning they can be represented by a bounded interval - // without gaps. + // Invariant: All ranges are simple, meaning they can be represented by a bounded + // interval without gaps. Additionally, there are at least two edges in the set. Version { edges: SmallVec<(Range, NodeId)>, }, // The edges of a string variable, representing a disjoint set of ranges that cover // the output space. // - // Note that all ranges are simple, meaning they can be represented by a bounded interval - // without gaps. + // Invariant: All ranges are simple, meaning they can be represented by a bounded + // interval without gaps. Additionally, there are at least two edges in the set. String { edges: SmallVec<(Range, NodeId)>, }, From cdd0482cdae379d1020165b6619ce62ae56994f7 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 14:59:51 -0400 Subject: [PATCH 08/13] document marker operation cache semantics --- crates/pep508-rs/src/marker/algebra.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index db2a80f9610b..4c28c2b11d59 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -82,7 +82,8 @@ struct InternerState { /// into [`InternerShared`]. unique: FxHashMap, - /// A cache for operations between two nodes. + /// A cache for `AND` operations between two nodes. + /// Note that that `OR` is implemented in terms of `AND`. cache: FxHashMap<(NodeId, NodeId), NodeId>, } From 6a599ff6614237148b8b5a1d249c3004502a3a5e Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 15:02:51 -0400 Subject: [PATCH 09/13] document invariant of `Edges::apply` --- crates/pep508-rs/src/marker/algebra.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index 4c28c2b11d59..859116df7d89 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -472,7 +472,8 @@ pub(crate) enum Edges { String { edges: SmallVec<(Range, NodeId)>, }, - // The edges of a boolean variable, representing `true` or `false` values. + // The edges of a boolean variable, representing the values `true` (the `high` child) + // and `false` (the `low` child). Boolean { high: NodeId, low: NodeId, @@ -496,6 +497,9 @@ impl Edges { } /// Returns the [`Edges`] for a string expression. + /// + /// This function will panic for the `In` and `Contains` marker operators, which + /// should be represented as separate boolean variables. fn from_string(operator: MarkerOperator, value: String) -> Edges { let range: Range = match operator { MarkerOperator::Equal => Range::singleton(value), @@ -558,6 +562,8 @@ impl Edges { } /// Merge two [`Edges`], applying the given function to all disjoint, intersecting edges. + /// + /// Note `self` and `map2` must be of the same [`Edges`] variant. fn apply( &self, parent: NodeId, From efd0fd77a04b932e860cf895018dadc3bcd2992b Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 15:20:53 -0400 Subject: [PATCH 10/13] document and clean up `Edges::apply` routine --- crates/pep508-rs/src/marker/algebra.rs | 88 ++++++++++++++++++-------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index 859116df7d89..a09b6e5c3c06 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -561,52 +561,86 @@ impl Edges { edges } - /// Merge two [`Edges`], applying the given function to all disjoint, intersecting edges. + /// Merge two [`Edges`], applying the given operation (e.g., `AND` or `OR`) to all intersecting edges. /// - /// Note `self` and `map2` must be of the same [`Edges`] variant. + /// For example, given two nodes corresponding to the same boolean variable: + /// ```text + /// left (extra == 'foo'): { true: A, false: B } + /// right (extra == 'foo'): { true: C, false: D } + /// ``` + /// + /// We merge them into a single node by applying the given operation to the matching edges. + /// ```text + /// (extra == 'foo'): { true: (A and C), false: (B and D) } + /// ``` + /// For non-boolean variables, this is more complex. See `apply_ranges` for details. + /// + /// Note that the LHS and RHS must be of the same [`Edges`] variant. fn apply( &self, parent: NodeId, - map2: &Edges, - parent2: NodeId, + right_edges: &Edges, + right_parent: NodeId, mut apply: impl FnMut(NodeId, NodeId) -> NodeId, ) -> Edges { - match (self, map2) { - // Version or string variables, merge the ranges. - (Edges::Version { edges: map }, Edges::Version { edges: map2 }) => Edges::Version { - edges: Edges::apply_ranges(map, parent, map2, parent2, apply), + match (self, right_edges) { + // For version or string variables, we have to split and merge the overlapping ranges. + (Edges::Version { edges }, Edges::Version { edges: right_edges }) => Edges::Version { + edges: Edges::apply_ranges(edges, parent, right_edges, right_parent, apply), }, - (Edges::String { edges: map }, Edges::String { edges: map2 }) => Edges::String { - edges: Edges::apply_ranges(map, parent, map2, parent2, apply), + (Edges::String { edges }, Edges::String { edges: right_edges }) => Edges::String { + edges: Edges::apply_ranges(edges, parent, right_edges, right_parent, apply), }, - // Boolean variables, simply merge the low and high nodes. + // For boolean variables, we simply merge the low and high edges. ( Edges::Boolean { high, low }, Edges::Boolean { - high: high2, - low: low2, + high: right_high, + low: right_low, }, ) => Edges::Boolean { - high: apply(high.negate(parent), high2.negate(parent)), - low: apply(low.negate(parent), low2.negate(parent)), + high: apply(high.negate(parent), right_high.negate(parent)), + low: apply(low.negate(parent), right_low.negate(parent)), }, - _ => unreachable!(), + _ => unreachable!("cannot apply two `Edges` of different types"), } } - /// Merge two range maps, applying the given function to all disjoint, intersecting ranges. + /// Merge two range maps, applying the given operation to all disjoint, intersecting ranges. + /// + /// For example, two nodes might have the following edges: + /// ```text + /// left (python_version): { [0, 3.4): A, [3.4, 3.4]: B, (3.4, inf): C } + /// right (python_version): { [0, 3.6): D, [3.6, 3.6]: E, (3.6, inf): F } + /// ``` + /// + /// Unlike with boolean variables, we can't simply apply the operation the static `true` + /// and `false` edges. Instead, we have to split and merge overlapping ranges: + /// ```text + /// python_version: { + /// [0, 3.4): (A and D), + /// [3.4, 3.4]: (B and D), + /// (3.4, 3.6): (C and D), + /// [3.6, 3.6]: (C and E), + /// (3.6, inf): (C and F) + /// } + /// ``` + /// + /// The left and right edges may also have a restricted range from calls to `restrict_versions`. + /// In that case, we drop any ranges that do not exist in the domain of both edges. Note that + /// this should not occur in practice because `requires-python` bounds are global. fn apply_ranges( - map: &SmallVec<(Range, NodeId)>, - parent: NodeId, - map2: &SmallVec<(Range, NodeId)>, - parent2: NodeId, + left_edges: &SmallVec<(Range, NodeId)>, + left_parent: NodeId, + right_edges: &SmallVec<(Range, NodeId)>, + right_parent: NodeId, mut apply: impl FnMut(NodeId, NodeId) -> NodeId, ) -> SmallVec<(Range, NodeId)> where T: Clone + Ord, { let mut combined = SmallVec::new(); - for (range, node) in map { + for (left_range, left_child) in left_edges { // Split the two maps into a set of disjoint and overlapping ranges, merging the // intersections. // @@ -614,15 +648,19 @@ impl Edges { // a bit more complicated despite the ranges being sorted. We cannot simply zip both // sets, as they may contain arbitrary gaps. Instead, we use a quadratic search for // simplicity as the set of ranges for a given variable is typically very small. - for (range2, node2) in map2 { - let intersection = range2.intersection(range); + for (right_range, right_child) in right_edges { + let intersection = right_range.intersection(left_range); if intersection.is_empty() { // TODO(ibraheem): take advantage of the sorted ranges to `break` early continue; } // Merge the intersection. - let node = apply(node.negate(parent), node2.negate(parent2)); + let node = apply( + left_child.negate(left_parent), + right_child.negate(right_parent), + ); + match combined.last_mut() { // Combine ranges if possible. Some((range, prev)) if *prev == node && can_conjoin(range, &intersection) => { From 1d7fc0707c65436bcbec7181ac2cc172236aff11 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 8 Aug 2024 15:21:58 -0400 Subject: [PATCH 11/13] clarify depth-first traversal in `collect_dnf` --- crates/pep508-rs/src/marker/simplify.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/pep508-rs/src/marker/simplify.rs b/crates/pep508-rs/src/marker/simplify.rs index d3338ed61b10..c0151fa4bc1b 100644 --- a/crates/pep508-rs/src/marker/simplify.rs +++ b/crates/pep508-rs/src/marker/simplify.rs @@ -19,8 +19,10 @@ pub(crate) fn to_dnf(tree: &MarkerTree) -> Vec> { /// Walk a [`MarkerTree`] recursively and construct a DNF expression. /// -/// A decision diagram can be converted to DNF form by walking the tree and collecting -/// all paths to a `true` terminal node. +/// A decision diagram can be converted to DNF form by performing a depth-first traversal of +/// the tree and collecting all paths to a `true` terminal node. +/// +/// `path` is the list of marker expressions traversed on the current path. fn collect_dnf( tree: &MarkerTree, dnf: &mut Vec>, From cccbe8cb759b85d692865c963246b7fb33c19fcb Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Fri, 9 Aug 2024 11:08:14 -0400 Subject: [PATCH 12/13] apply suggestions from code review --- crates/pep508-rs/src/marker/algebra.rs | 74 +++++++++++++++---------- crates/pep508-rs/src/marker/simplify.rs | 25 ++++++--- 2 files changed, 61 insertions(+), 38 deletions(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index a09b6e5c3c06..c08497243b18 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -21,7 +21,7 @@ //! Specifically, a marker tree is represented as a Reduced Ordered ADD. An ADD is ordered if //! different variables appear in the same order on all paths from the root. Additionally, an ADD //! is reduced if: -//! - Isomoprhic nodes are merged. +//! - Isomorphic nodes are merged. //! - Nodes with isomorphic children are eliminated. //! //! These two rules provide an important guarantee for marker trees: marker trees are canonical for @@ -37,7 +37,13 @@ //! //! Additionally, the implementation in this module uses complemented edges, meaning a marker tree and //! it's complement are represented by the same node internally. This allows cheap constant-time marker -//! tree negation. +//! tree negation. It also allows us to only implement a single operation for both `AND` and `OR`, implementing +//! the other in terms of its DeMorgan Complement. +//! +//! ADDs are created and managed through the global [`Interner`]. A given ADD is referenced through +//! a [`NodeId`], which represents a potentially complemented reference to a [`Node`] in the interner, +//! or a terminal `true`/`false` node. Interning allows the reduction rule that isomorphic nodes are +//! merged to be applied globally. use std::cmp::Ordering; use std::fmt; use std::ops::Bound; @@ -60,7 +66,7 @@ pub(crate) static INTERNER: LazyLock = LazyLock::new(Interner::default /// An interner for decision nodes. /// -/// Interning decision nodes allows isomoprhic nodes to be automatically merged. +/// Interning decision nodes allows isomorphic nodes to be automatically merged. /// It also allows nodes to cheaply compared. #[derive(Default)] pub(crate) struct Interner { @@ -83,7 +89,7 @@ struct InternerState { unique: FxHashMap, /// A cache for `AND` operations between two nodes. - /// Note that that `OR` is implemented in terms of `AND`. + /// Note that `OR` is implemented in terms of `AND`. cache: FxHashMap<(NodeId, NodeId), NodeId>, } @@ -164,7 +170,7 @@ impl InternerGuard<'_> { // // Note that in the presence of the `in` operator, we may not be able to simplify // some marker trees to a constant `true` or `false`. For example, it is not trivial to - // detect that `os_name < 'z' and os_name in 'Linux'` is unsatisfiable. + // detect that `os_name > 'z' and os_name in 'Linux'` is unsatisfiable. MarkerExpression::String { key, operator: MarkerOperator::In, @@ -274,7 +280,9 @@ impl InternerGuard<'_> { // Restrict the output of a given boolean variable in the tree. // - // This allows a tree to be simplified if a variable is known to be `true`. + // If the provided function `f` returns a `Some` boolean value, the tree will be simplified + // with the assumption that the given variable is restricted to that value. If the function + // returns `None`, the variable will not be affected. pub(crate) fn restrict(&mut self, i: NodeId, f: &impl Fn(&Variable) -> Option) -> NodeId { if matches!(i, NodeId::TRUE | NodeId::FALSE) { return i; @@ -297,8 +305,9 @@ impl InternerGuard<'_> { // Restrict the output of a given version variable in the tree. // - // This allows the tree to be simplified if a variable is known to be restricted to a - // particular range of outputs. + // If the provided function `f` returns a `Some` range, the tree will be simplified with + // the assumption that the given variable is restricted to values in that range. If the function + // returns `None`, the variable will not be affected. pub(crate) fn restrict_versions( &mut self, i: NodeId, @@ -392,7 +401,7 @@ impl Node { } } -/// An ID representing a unique decision node. +/// An ID representing a reference to a decision node in the [`Interner`]. /// /// The lowest bit of the ID is used represent complemented edges. #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -407,31 +416,26 @@ impl NodeId { /// Create a new, optionally complemented, [`NodeId`] with the given index. fn new(index: usize, complement: bool) -> NodeId { - NodeId(((index + 1) << 1) | usize::from(complement)) + // Ensure the index does not interfere with the lowest complement bit. + let index = (index + 1) << 1; + NodeId(index | usize::from(complement)) } /// Returns the index of this ID, ignoring the complemented edge. fn index(self) -> usize { + // Ignore the lowest bit and bring indices back to starting at `0`. (self.0 >> 1) - 1 } /// Returns `true` if this ID represents a complemented edge. fn is_complement(self) -> bool { + // Whether the lowest bit is set. (self.0 & 1) == 1 } - /// Returns `true` if this node represents an unsatisfiable node. - pub(crate) fn is_false(self) -> bool { - self == NodeId::FALSE - } - - /// Returns `true` if this node represents a trivially `true` node. - pub(crate) fn is_true(self) -> bool { - self == NodeId::TRUE - } - /// Returns the complement of this node. pub(crate) fn not(self) -> NodeId { + // Toggle the lowest bit. NodeId(self.0 ^ 1) } @@ -446,6 +450,16 @@ impl NodeId { self } } + + /// Returns `true` if this node represents an unsatisfiable node. + pub(crate) fn is_false(self) -> bool { + self == NodeId::FALSE + } + + /// Returns `true` if this node represents a trivially `true` node. + pub(crate) fn is_true(self) -> bool { + self == NodeId::TRUE + } } /// A [`SmallVec`] with enough elements to hold two constant edges, as well as the @@ -519,14 +533,16 @@ impl Edges { /// Returns the [`Edges`] for a version specifier. fn from_specifier(specifier: &VersionSpecifier) -> Edges { - // The decision diagram relies on the assumption that the negation of a marker tree - // is the complement of the marker space. However, pre-release versions violate - // this assumption. For example, the marker `python_version > '3.9' or python_version <= '3.9'` - // does not match `python_version == 3.9.0a0`. However, it's negation, - // `python_version > '3.9' and python_version <= '3.9'` also does not include `3.9.0a0`, and is - // actually `false`. + // The decision diagram relies on the assumption that the negation of a marker tree is + // the complement of the marker space. However, pre-release versions violate this assumption. + // For example, the marker `python_full_version > '3.9' or python_full_version <= '3.9'` + // does not match `python_full_version == 3.9.0a0`. However, it's negation, + // `python_full_version > '3.9' and python_full_version <= '3.9'` also does not include + // `3.9.0a0`, and is actually `false`. // - // For this reason we ignore pre-release versions completely when evaluating markers. + // For this reason we ignore pre-release versions entirely when evaluating markers. + // Note that `python_version` cannot take on pre-release values so this is necessary for + // simplifying ranges, but for `python_full_version` this decision is a semantic change. let specifier = PubGrubSpecifier::from_release_specifier(specifier).unwrap(); Edges::Version { edges: Edges::from_range(&specifier.into()), @@ -538,8 +554,6 @@ impl Edges { where T: Ord + Clone, { - let complement = range.complement(); - let mut edges = SmallVec::new(); // Add the `true` edges. @@ -549,7 +563,7 @@ impl Edges { } // Add the `false` edges. - for (start, end) in complement.iter() { + for (start, end) in range.complement().iter() { let range = Range::from_range_bounds((start.clone(), end.clone())); edges.push((range, NodeId::FALSE)); } diff --git a/crates/pep508-rs/src/marker/simplify.rs b/crates/pep508-rs/src/marker/simplify.rs index c0151fa4bc1b..2a0896eeb81e 100644 --- a/crates/pep508-rs/src/marker/simplify.rs +++ b/crates/pep508-rs/src/marker/simplify.rs @@ -10,6 +10,13 @@ use rustc_hash::FxBuildHasher; use crate::{ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeKind}; /// Returns a simplified DNF expression for a given marker tree. +/// +/// Marker trees are represented as decision diagrams that cannot be directly serialized to. +/// a boolean expression. Instead, you must traverse and collect all possible solutions to the +/// diagram, which can be used to create a DNF expression, or all non-solutions to the diagram, +/// which can be used to create a CNF expression. +/// +/// We choose DNF as it is easier to simplify for user-facing output. pub(crate) fn to_dnf(tree: &MarkerTree) -> Vec> { let mut dnf = Vec::new(); collect_dnf(tree, &mut dnf, &mut Vec::new()); @@ -162,19 +169,21 @@ fn collect_dnf( /// Simplifies a DNF expression. /// -/// A decision tree is canonical, but only for a given variable order. Depending on the +/// A decision diagram is canonical, but only for a given variable order. Depending on the /// pre-defined order, the DNF expression produced by a decision tree can still be further /// simplified. /// -/// Completely simplifying a DNF expression is NP-hard and amounts to the set cover -/// problem. Additionally, marker expressions can contain complex expressions involving -/// version ranges that are not trivial to simplify. +/// For example, the decision diagram for the expression `A or B` will be represented as +/// `A or (not A and B)` or `B or (not B and A)`, depending on the variable order. In both +/// cases, the negation in the second clause is redundant. /// -/// Instead, we choose to simplify at the boolean variable level without any truth table -/// expansion. Combined with the normalization applied by decision trees, this seems to be -/// sufficient in practice. +/// Completely simplifying a DNF expression is NP-hard and amounts to the set cover problem. +/// Additionally, marker expressions can contain complex expressions involving version ranges +/// that are not trivial to simplify. Instead, we choose to simplify at the boolean variable +/// level without any truth table expansion. Combined with the normalization applied by decision +/// trees, this seems to be sufficient in practice. /// -/// Note: This function has quadratic time complexity. However, it not applied on every marker +/// Note: This function has quadratic time complexity. However, it is not applied on every marker /// operation, only to user facing output, which are typically very simple. fn simplify(dnf: &mut Vec>) { for i in 0..dnf.len() { From 2f06d72692bbd326ff550693cb042d0bd4eecacc Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Fri, 9 Aug 2024 11:38:53 -0400 Subject: [PATCH 13/13] typos --- crates/pep508-rs/src/marker/algebra.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index c08497243b18..929c2b3813a7 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -38,7 +38,7 @@ //! Additionally, the implementation in this module uses complemented edges, meaning a marker tree and //! it's complement are represented by the same node internally. This allows cheap constant-time marker //! tree negation. It also allows us to only implement a single operation for both `AND` and `OR`, implementing -//! the other in terms of its DeMorgan Complement. +//! the other in terms of its De Morgan Complement. //! //! ADDs are created and managed through the global [`Interner`]. A given ADD is referenced through //! a [`NodeId`], which represents a potentially complemented reference to a [`Node`] in the interner, @@ -215,7 +215,7 @@ impl InternerGuard<'_> { // Returns a decision node representing the disjunction of two nodes. pub(crate) fn or(&mut self, x: NodeId, y: NodeId) -> NodeId { // We take advantage of cheap negation here and implement OR in terms - // of it's DeMorgan complement. + // of it's De Morgan complement. self.and(x.not(), y.not()).not() }