From ffd339443c6971633c6c0ba8a30eb8de4f5ca924 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 12 Dec 2024 12:36:46 -0500 Subject: [PATCH] Patch sysconfig --- Cargo.lock | 1 + crates/uv-python/Cargo.toml | 1 + .../uv-python/python/get_interpreter_info.py | 5 +- crates/uv-python/src/discovery.rs | 4 +- crates/uv-python/src/installation.rs | 1 + crates/uv-python/src/interpreter.rs | 15 +- crates/uv-python/src/lib.rs | 4 +- crates/uv-python/src/managed.rs | 19 +- crates/uv-python/src/sysconfig/cursor.rs | 148 +++++++ crates/uv-python/src/sysconfig/mod.rs | 213 +++++++++ crates/uv-python/src/sysconfig/parser.rs | 410 ++++++++++++++++++ crates/uv-virtualenv/src/virtualenv.rs | 14 +- crates/uv/src/commands/python/install.rs | 1 + 13 files changed, 817 insertions(+), 19 deletions(-) create mode 100644 crates/uv-python/src/sysconfig/cursor.rs create mode 100644 crates/uv-python/src/sysconfig/mod.rs create mode 100644 crates/uv-python/src/sysconfig/parser.rs diff --git a/Cargo.lock b/Cargo.lock index d61c8712fd70..2940aca9f285 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5296,6 +5296,7 @@ dependencies = [ "futures", "goblin", "indoc", + "insta", "itertools 0.13.0", "owo-colors", "procfs", diff --git a/crates/uv-python/Cargo.toml b/crates/uv-python/Cargo.toml index 074784f3fb0a..92930aa298ac 100644 --- a/crates/uv-python/Cargo.toml +++ b/crates/uv-python/Cargo.toml @@ -72,6 +72,7 @@ windows-result = { workspace = true } anyhow = { version = "1.0.89" } assert_fs = { version = "1.1.2" } indoc = { workspace = true } +insta = { version = "1.40.0" } itertools = { version = "0.13.0" } temp-env = { version = "0.3.6" } tempfile = { workspace = true } diff --git a/crates/uv-python/python/get_interpreter_info.py b/crates/uv-python/python/get_interpreter_info.py index 92c8cc8a5f21..03358537cc56 100644 --- a/crates/uv-python/python/get_interpreter_info.py +++ b/crates/uv-python/python/get_interpreter_info.py @@ -573,7 +573,10 @@ def main() -> None: "sys_executable": sys.executable, "sys_path": sys.path, "stdlib": sysconfig.get_path("stdlib"), - "sysconfig_prefix": sysconfig.get_config_var("prefix"), + # Prior to the introduction of `sysconfig` patching, python-build-standalone installations would always use + # "/install" as the prefix. With `sysconfig` patching, we rewrite the prefix to match the actual installation + # location. So in newer versions, we also write a dedicated flag to indicate standalone builds. + "standalone": sysconfig.get_config_var("prefix") == "/install" or bool(sysconfig.get_config_var("PYTHON_BUILD_STANDALONE")), "scheme": get_scheme(), "virtualenv": get_virtualenv(), "platform": os_and_arch, diff --git a/crates/uv-python/src/discovery.rs b/crates/uv-python/src/discovery.rs index b2a690dd4cae..9b7ce4581052 100644 --- a/crates/uv-python/src/discovery.rs +++ b/crates/uv-python/src/discovery.rs @@ -2518,7 +2518,7 @@ fn disjunction(items: &[&str]) -> String { fn try_into_u8_slice(release: &[u64]) -> Result, std::num::TryFromIntError> { release .iter() - .map(|x| match (*x).try_into() { + .map(|x| match u8::try_from(*x) { Ok(x) => Ok(x), Err(e) => Err(e), }) @@ -2527,7 +2527,7 @@ fn try_into_u8_slice(release: &[u64]) -> Result, std::num::TryFromIntErr /// Convert a wheel tag formatted version (e.g., `38`) to multiple components (e.g., `3.8`). /// -/// The major version is always assumed to be a single digit 0-9. The minor version is all of +/// The major version is always assumed to be a single digit 0-9. The minor version is all /// the following content. /// /// If not a wheel tag formatted version, the input is returned unchanged. diff --git a/crates/uv-python/src/installation.rs b/crates/uv-python/src/installation.rs index 31f57ec139ba..a0b6f9349fe9 100644 --- a/crates/uv-python/src/installation.rs +++ b/crates/uv-python/src/installation.rs @@ -163,6 +163,7 @@ impl PythonInstallation { let installed = ManagedPythonInstallation::new(path)?; installed.ensure_externally_managed()?; + installed.ensure_sysconfig_patched()?; installed.ensure_canonical_executables()?; Ok(Self { diff --git a/crates/uv-python/src/interpreter.rs b/crates/uv-python/src/interpreter.rs index 4c9013aece92..cb5ab8fcd993 100644 --- a/crates/uv-python/src/interpreter.rs +++ b/crates/uv-python/src/interpreter.rs @@ -46,7 +46,7 @@ pub struct Interpreter { sys_executable: PathBuf, sys_path: Vec, stdlib: PathBuf, - sysconfig_prefix: Option, + standalone: bool, tags: OnceLock, target: Option, prefix: Option, @@ -80,7 +80,7 @@ impl Interpreter { sys_executable: info.sys_executable, sys_path: info.sys_path, stdlib: info.stdlib, - sysconfig_prefix: info.sysconfig_prefix, + standalone: info.standalone, tags: OnceLock::new(), target: None, prefix: None, @@ -368,11 +368,6 @@ impl Interpreter { &self.stdlib } - /// Return the `prefix` path for this Python interpreter, as returned by `sysconfig.get_config_var("prefix")`. - pub fn sysconfig_prefix(&self) -> Option<&Path> { - self.sysconfig_prefix.as_deref() - } - /// Return the `purelib` path for this Python interpreter, as returned by `sysconfig.get_paths()`. pub fn purelib(&self) -> &Path { &self.scheme.purelib @@ -441,8 +436,7 @@ impl Interpreter { /// /// See: pub fn is_standalone(&self) -> bool { - self.sysconfig_prefix() - .is_some_and(|prefix| prefix == Path::new("/install")) + self.standalone } /// Return the [`Layout`] environment used to install wheels into this interpreter. @@ -626,7 +620,7 @@ struct InterpreterInfo { sys_executable: PathBuf, sys_path: Vec, stdlib: PathBuf, - sysconfig_prefix: Option, + standalone: bool, pointer_size: PointerSize, gil_disabled: bool, } @@ -854,6 +848,7 @@ mod tests { "arch": "x86_64" }, "manylinux_compatible": false, + "standalone": false, "markers": { "implementation_name": "cpython", "implementation_version": "3.12.0", diff --git a/crates/uv-python/src/lib.rs b/crates/uv-python/src/lib.rs index 703e8a033fb7..982878ff3d56 100644 --- a/crates/uv-python/src/lib.rs +++ b/crates/uv-python/src/lib.rs @@ -8,7 +8,7 @@ pub use crate::discovery::{ find_python_installations, EnvironmentPreference, Error as DiscoveryError, PythonDownloads, PythonNotFound, PythonPreference, PythonRequest, PythonSource, PythonVariant, VersionRequest, }; -pub use crate::environment::{InvalidEnvironment, InvalidEnvironmentKind, PythonEnvironment}; +pub use crate::environment::{InvalidEnvironmentKind, PythonEnvironment}; pub use crate::implementation::ImplementationName; pub use crate::installation::{PythonInstallation, PythonInstallationKey}; pub use crate::interpreter::{Error as InterpreterError, Interpreter}; @@ -39,6 +39,7 @@ mod prefix; #[cfg(windows)] mod py_launcher; mod python_version; +mod sysconfig; mod target; mod version_files; mod virtualenv; @@ -220,6 +221,7 @@ mod tests { "arch": "x86_64" }, "manylinux_compatible": true, + "standalone": true, "markers": { "implementation_name": "{IMPLEMENTATION}", "implementation_version": "{FULL_VERSION}", diff --git a/crates/uv-python/src/managed.rs b/crates/uv-python/src/managed.rs index 67484ad97a35..5cc45d020bf1 100644 --- a/crates/uv-python/src/managed.rs +++ b/crates/uv-python/src/managed.rs @@ -25,7 +25,7 @@ use crate::libc::LibcDetectionError; use crate::platform::Error as PlatformError; use crate::platform::{Arch, Libc, Os}; use crate::python_version::PythonVersion; -use crate::{PythonRequest, PythonVariant}; +use crate::{sysconfig, PythonRequest, PythonVariant}; #[derive(Error, Debug)] pub enum Error { #[error(transparent)] @@ -40,6 +40,8 @@ pub enum Error { InvalidPythonVersion(String), #[error(transparent)] ExtractError(#[from] uv_extract::Error), + #[error(transparent)] + SysconfigError(#[from] sysconfig::Error), #[error("Failed to copy to: {0}", to.user_display())] CopyError { to: PathBuf, @@ -491,6 +493,21 @@ impl ManagedPythonInstallation { Ok(()) } + /// Ensure that the `sysconfig` data is patched to match the installation path. + pub fn ensure_sysconfig_patched(&self) -> Result<(), Error> { + if cfg!(unix) { + if *self.implementation() == ImplementationName::CPython { + sysconfig::update_sysconfig( + self.path(), + self.key.major, + self.key.minor, + self.key.variant.suffix(), + )?; + } + } + Ok(()) + } + /// Create a link to the managed Python executable. /// /// If the file already exists at the target path, an error will be returned. diff --git a/crates/uv-python/src/sysconfig/cursor.rs b/crates/uv-python/src/sysconfig/cursor.rs new file mode 100644 index 000000000000..d6abb9fc4ac2 --- /dev/null +++ b/crates/uv-python/src/sysconfig/cursor.rs @@ -0,0 +1,148 @@ +#![allow(dead_code)] + +use std::str::Chars; + +pub(super) const EOF_CHAR: char = '\0'; + +/// A cursor represents a pointer in the source code. +/// +/// Based on [`rustc`'s `Cursor`](https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs) +#[derive(Clone, Debug)] +pub(super) struct Cursor<'src> { + /// An iterator over the [`char`]'s of the source code. + chars: Chars<'src>, + + /// Stores the previous character for debug assertions. + #[cfg(debug_assertions)] + prev_char: char, +} + +impl<'src> Cursor<'src> { + pub(super) fn new(source: &'src str) -> Self { + Self { + chars: source.chars(), + #[cfg(debug_assertions)] + prev_char: EOF_CHAR, + } + } + + /// Returns the previous character. Useful for debug assertions. + #[cfg(debug_assertions)] + pub(super) const fn previous(&self) -> char { + self.prev_char + } + + /// Peeks the next character from the input stream without consuming it. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. + pub(super) fn first(&self) -> char { + self.chars.clone().next().unwrap_or(EOF_CHAR) + } + + /// Peeks the second character from the input stream without consuming it. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. + pub(super) fn second(&self) -> char { + let mut chars = self.chars.clone(); + chars.next(); + chars.next().unwrap_or(EOF_CHAR) + } + + /// Returns the remaining text to lex. + /// + /// Use [`Cursor::text_len`] to get the length of the remaining text. + pub(super) fn rest(&self) -> &'src str { + self.chars.as_str() + } + + /// Returns `true` if the cursor is at the end of file. + pub(super) fn is_eof(&self) -> bool { + self.chars.as_str().is_empty() + } + + /// Moves the cursor to the next character, returning the previous character. + /// Returns [`None`] if there is no next character. + pub(super) fn bump(&mut self) -> Option { + let prev = self.chars.next()?; + + #[cfg(debug_assertions)] + { + self.prev_char = prev; + } + + Some(prev) + } + + pub(super) fn eat_char(&mut self, c: char) -> bool { + if self.first() == c { + self.bump(); + true + } else { + false + } + } + + pub(super) fn eat_char2(&mut self, c1: char, c2: char) -> bool { + let mut chars = self.chars.clone(); + if chars.next() == Some(c1) && chars.next() == Some(c2) { + self.bump(); + self.bump(); + true + } else { + false + } + } + + pub(super) fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool { + let mut chars = self.chars.clone(); + if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) { + self.bump(); + self.bump(); + self.bump(); + true + } else { + false + } + } + + pub(super) fn eat_if(&mut self, mut predicate: F) -> Option + where + F: FnMut(char) -> bool, + { + if predicate(self.first()) && !self.is_eof() { + self.bump() + } else { + None + } + } + + /// Eats symbols while predicate returns true or until the end of file is reached. + #[inline] + pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { + // It was tried making optimized version of this for eg. line comments, but + // LLVM can inline all of this and compile it down to fast iteration over bytes. + while predicate(self.first()) && !self.is_eof() { + self.bump(); + } + } + + /// Skips the next `count` bytes. + /// + /// ## Panics + /// - If `count` is larger than the remaining bytes in the input stream. + /// - If `count` indexes into a multi-byte character. + pub(super) fn skip_bytes(&mut self, count: usize) { + #[cfg(debug_assertions)] + { + self.prev_char = self.chars.as_str()[..count] + .chars() + .next_back() + .unwrap_or('\0'); + } + + self.chars = self.chars.as_str()[count..].chars(); + } + + /// Skips to the end of the input stream. + pub(super) fn skip_to_end(&mut self) { + self.chars = "".chars(); + } +} diff --git a/crates/uv-python/src/sysconfig/mod.rs b/crates/uv-python/src/sysconfig/mod.rs new file mode 100644 index 000000000000..c67baed80687 --- /dev/null +++ b/crates/uv-python/src/sysconfig/mod.rs @@ -0,0 +1,213 @@ +//! Patch `sysconfig` data in a Python installation. +//! +//! Inspired by: , +//! available under the MIT license: +//! +//! ```text +//! Copyright 2024 Ulrik Sverdrup "bluss" +//! +//! Permission is hereby granted, free of charge, to any person obtaining a copy of +//! this software and associated documentation files (the "Software"), to deal in +//! the Software without restriction, including without limitation the rights to +//! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +//! the Software, and to permit persons to whom the Software is furnished to do so, +//! subject to the following conditions: +//! +//! The above copyright notice and this permission notice shall be included in all +//! copies or substantial portions of the Software. +//! +//! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +//! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +//! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +//! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +//! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +//! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +//! ``` + +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use tracing::trace; + +use crate::sysconfig::parser::{Error as ParseError, SysconfigData, Value}; + +mod cursor; +mod parser; + +/// Update the `sysconfig` data in a Python installation. +pub(crate) fn update_sysconfig( + install_root: &Path, + major: u8, + minor: u8, + suffix: &str, +) -> Result<(), Error> { + // Find the `_sysconfigdata_` file in the Python installation. + let real_prefix = std::path::absolute(install_root)?; + let sysconfigdata = find_sysconfigdata(&real_prefix, major, minor, suffix)?; + trace!( + "Discovered `sysconfig` data at: {}", + sysconfigdata.display() + ); + + // Update the `_sysconfigdata_` file in-memory. + let contents = fs_err::read_to_string(&sysconfigdata)?; + let data = SysconfigData::from_str(&contents)?; + let data = patch_sysconfigdata(data, &real_prefix); + let contents = data.to_string_pretty()?; + + // Write the updated `_sysconfigdata_` file. + let mut file = fs_err::OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(&sysconfigdata)?; + file.write_all(contents.as_bytes())?; + file.sync_data()?; + + Ok(()) +} + +/// Find the `_sysconfigdata_` file in a Python installation. +/// +/// For example, on macOS, returns `{real_prefix}/lib/python3.12/_sysconfigdata__darwin_darwin.py"`. +fn find_sysconfigdata( + real_prefix: &Path, + major: u8, + minor: u8, + suffix: &str, +) -> Result { + // Find the `lib` directory in the Python installation. + let lib = real_prefix + .join("lib") + .join(format!("python{major}.{minor}{suffix}")); + if !lib.exists() { + return Err(Error::MissingLib); + } + + // Probe the `lib` directory for `_sysconfigdata_`. + for entry in lib.read_dir()? { + let entry = entry?; + + if entry.path().extension().is_none_or(|ext| ext != "py") { + continue; + } + + if !entry + .path() + .file_stem() + .and_then(|stem| stem.to_str()) + .is_some_and(|stem| stem.starts_with("_sysconfigdata_")) + { + continue; + } + + let metadata = entry.metadata()?; + if metadata.is_symlink() { + continue; + }; + + if metadata.is_file() { + return Ok(entry.path()); + } + } + + Err(Error::MissingSysconfigdata) +} + +/// Patch the given `_sysconfigdata_` contents. +fn patch_sysconfigdata(mut data: SysconfigData, real_prefix: &Path) -> SysconfigData { + /// Update the `/install` prefix in a whitespace-separated string. + fn update_prefix(s: &str, real_prefix: &Path) -> String { + s.split_whitespace() + .map(|part| { + if let Some(rest) = part.strip_prefix("/install") { + if rest.is_empty() { + real_prefix.display().to_string() + } else { + real_prefix.join(&rest[1..]).display().to_string() + } + } else { + part.to_string() + } + }) + .collect::>() + .join(" ") + } + + // Patch each value, as needed. + let mut count = 0; + for (key, value) in data.iter_mut() { + let Value::String(value) = value else { + continue; + }; + let patched = update_prefix(value, real_prefix); + if *value != patched { + trace!("Updated `{key}` from `{value}` to `{patched}`"); + count += 1; + *value = patched; + } + } + + match count { + 0 => trace!("No updates required"), + 1 => trace!("Updated 1 value"), + n => trace!("Updated {n} values"), + } + + // Mark the Python installation as standalone. + data.insert("PYTHON_BUILD_STANDALONE".to_string(), Value::Int(1)); + + data +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Python installation is missing a `lib` directory")] + MissingLib, + #[error("Python installation is missing a `_sysconfigdata_` file")] + MissingSysconfigdata, + #[error(transparent)] + Parse(#[from] ParseError), + #[error(transparent)] + Json(#[from] serde_json::Error), +} + +#[cfg(test)] +#[cfg(unix)] +mod tests { + use super::*; + + #[test] + fn update_real_prefix() -> Result<(), Error> { + let sysconfigdata = [ + ("BASEMODLIBS", ""), + ("BUILDPYTHON", "python.exe"), + ("prefix", "/install/prefix"), + ("exec_prefix", "/install/exec_prefix"), + ("base", "/install/base"), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), Value::String(v.to_string()))) + .collect::(); + + let real_prefix = Path::new("/real/prefix"); + let data = patch_sysconfigdata(sysconfigdata, real_prefix); + + insta::assert_snapshot!(data.to_string_pretty()?, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "BASEMODLIBS": "", + "BUILDPYTHON": "python.exe", + "PYTHON_BUILD_STANDALONE": 1, + "base": "/real/prefix/base", + "exec_prefix": "/real/prefix/exec_prefix", + "prefix": "/real/prefix/prefix" + } + "###); + + Ok(()) + } +} diff --git a/crates/uv-python/src/sysconfig/parser.rs b/crates/uv-python/src/sysconfig/parser.rs new file mode 100644 index 000000000000..ceebe4c41b71 --- /dev/null +++ b/crates/uv-python/src/sysconfig/parser.rs @@ -0,0 +1,410 @@ +use std::collections::BTreeMap; +use std::str::FromStr; + +use serde::Serialize; +use serde_json::ser::PrettyFormatter; + +use crate::sysconfig::cursor::{Cursor, EOF_CHAR}; + +/// A value in the [`SysconfigData`] map. +/// +/// Values are assumed to be either strings or integers. +#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] +#[serde(untagged)] +pub(super) enum Value { + String(String), + Int(i32), +} + +/// The data extracted from a `_sysconfigdata_` file. +#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] +pub(super) struct SysconfigData(BTreeMap); + +impl SysconfigData { + /// Returns an iterator over the key-value pairs in the map. + pub(super) fn iter_mut(&mut self) -> std::collections::btree_map::IterMut { + self.0.iter_mut() + } + + /// Inserts a key-value pair into the map. + pub(super) fn insert(&mut self, key: String, value: Value) -> Option { + self.0.insert(key, value) + } + + /// Formats the `sysconfig` data as a pretty-printed string. + pub(super) fn to_string_pretty(&self) -> Result { + let output = { + let mut buf = Vec::new(); + let mut serializer = serde_json::Serializer::with_formatter( + &mut buf, + PrettyFormatter::with_indent(b" "), + ); + self.0.serialize(&mut serializer)?; + String::from_utf8(buf).unwrap() + }; + Ok(format!( + "# system configuration generated and used by the sysconfig module\nbuild_time_vars = {output}\n", + )) + } +} + +impl std::fmt::Display for SysconfigData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let output = { + let mut buf = Vec::new(); + let mut serializer = serde_json::Serializer::new(&mut buf); + self.0.serialize(&mut serializer).unwrap(); + String::from_utf8(buf).unwrap() + }; + write!(f, "{output}",) + } +} + +impl FromIterator<(String, Value)> for SysconfigData { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +/// Parse the `_sysconfigdata_` file (e.g., `{real_prefix}/lib/python3.12/_sysconfigdata__darwin_darwin.py"` +/// on macOS). +/// +/// `_sysconfigdata_` is structured as follows: +/// +/// 1. A comment on the first line (e.g., `# system configuration generated and used by the sysconfig module`). +/// 2. An assignment to `build_time_vars` (e.g., `build_time_vars = { ... }`). +/// +/// The right-hand side of the assignment is a JSON object. The keys are strings, and the values +/// are strings or numbers. +impl FromStr for SysconfigData { + type Err = Error; + + fn from_str(s: &str) -> Result { + // Read the first line of the file. + let Some(s) = + s.strip_prefix("# system configuration generated and used by the sysconfig module\n") + else { + return Err(Error::MissingHeader); + }; + + // Read the assignment to `build_time_vars`. + let Some(s) = s.strip_prefix("build_time_vars") else { + return Err(Error::MissingAssignment); + }; + + let mut cursor = Cursor::new(s); + + cursor.eat_while(is_python_whitespace); + if !cursor.eat_char('=') { + return Err(Error::MissingAssignment); + } + cursor.eat_while(is_python_whitespace); + + if !cursor.eat_char('{') { + return Err(Error::MissingOpenBrace); + } + + let mut map = BTreeMap::new(); + loop { + match cursor.first() { + '\'' | '"' => { + // Parse key. + let key = parse_string(&mut cursor)?; + + cursor.eat_while(is_python_whitespace); + cursor.eat_char(':'); + cursor.eat_while(is_python_whitespace); + + // Parse value + let value = match cursor.first() { + '\'' | '"' => Value::String(parse_concatenated_string(&mut cursor)?), + '-' => { + cursor.bump(); + Value::Int(-parse_int(&mut cursor)?) + } + c if c.is_ascii_digit() => Value::Int(parse_int(&mut cursor)?), + c => return Err(Error::UnexpectedCharacter(c)), + }; + + // Insert into map. + map.insert(key, value); + + // Skip optional comma. + cursor.eat_while(is_python_whitespace); + cursor.eat_char(','); + cursor.eat_while(is_python_whitespace); + } + + // Skip whitespace. + ' ' | '\n' | '\r' | '\t' => { + cursor.bump(); + } + + // When we see a closing brace, we're done. + '}' => { + cursor.bump(); + break; + } + + c => return Err(Error::UnexpectedCharacter(c)), + } + } + + Ok(Self(map)) + } +} + +/// Parse a Python string literal. +fn parse_string(cursor: &mut Cursor) -> Result { + let quote = cursor.bump().expect("Expected opening quote"); + assert!(quote == '\'' || quote == '"', "Invalid quote character"); + + let mut result = String::new(); + loop { + if cursor.first() == EOF_CHAR { + return Err(Error::UnexpectedCharacter(EOF_CHAR)); + } + + // Handle escaped quotes. + if cursor.first() == '\\' { + // Consume the backslash. + cursor.bump(); + if cursor.first() == quote { + result.push(quote); + cursor.bump(); + continue; + } + + // Keep the backslash and following character. + result.push('\\'); + result.push(cursor.first()); + cursor.bump(); + continue; + } + + // Consume closing quote. + if cursor.first() == quote { + cursor.bump(); + break; + } + + result.push(cursor.first()); + cursor.bump(); + } + Ok(result) +} + +/// Parse a Python string, which may be a concatenation of multiple string literals. +fn parse_concatenated_string(cursor: &mut Cursor) -> Result { + let mut result = String::new(); + loop { + let c = cursor.first(); + if c == EOF_CHAR { + break; + } + if c == '\'' || c == '"' { + // Parse a new string fragment and append it. + result.push_str(&parse_string(cursor)?); + } else if is_python_whitespace(c) { + // Skip whitespace between fragments + cursor.bump(); + } else if c == ',' || c == '}' { + // End of value. + break; + } else { + return Err(Error::UnexpectedCharacter(c)); + } + } + Ok(result) +} + +/// Parse an integer literal. +fn parse_int(cursor: &mut Cursor) -> Result { + let mut result = String::new(); + loop { + let c = cursor.first(); + if c == EOF_CHAR { + break; + } + if !c.is_ascii_digit() { + break; + } + result.push(c); + cursor.bump(); + } + result.parse() +} + +/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens) +/// characters. +const fn is_python_whitespace(c: char) -> bool { + matches!( + c, + // Space, tab, form-feed, newline, or carriage return + ' ' | '\t' | '\x0C' | '\n' | '\r' + ) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Missing opening brace")] + MissingOpenBrace, + #[error("Unexpected character: {0}")] + UnexpectedCharacter(char), + #[error("Failed to parse integer")] + ParseInt(#[from] std::num::ParseIntError), + #[error("`_sysconfigdata_` is missing a header comment")] + MissingHeader, + #[error("`_sysconfigdata_` is missing an assignment to `build_time_vars`")] + MissingAssignment, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_string() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1", + "key2": 42, + "key3": "multi-part" " string" + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1", + "key2": 42, + "key3": "multi-part string" + } + "###); + } + + #[test] + fn test_parse_integer_values() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": 12345, + "key2": -15 + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": 12345, + "key2": -15 + } + "###); + } + + #[test] + fn test_parse_escaped_quotes() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value with \"escaped quotes\"", + "key2": 'single-quoted \'escaped\'' + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value with \"escaped quotes\"", + "key2": "single-quoted 'escaped'" + } + "###); + } + + #[test] + fn test_parse_concatenated_strings() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "multi-" + "line " + "string" + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "multi-line string" + } + "###); + } + + #[test] + fn test_missing_header_error() { + let input = indoc::indoc!( + r#" + build_time_vars = { + "key1": "value1" + } + "# + ); + + let result = input.parse::(); + assert!(matches!(result, Err(Error::MissingHeader))); + } + + #[test] + fn test_missing_assignment_error() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + { + "key1": "value1" + } + "# + ); + + let result = input.parse::(); + assert!(matches!(result, Err(Error::MissingAssignment))); + } + + #[test] + fn test_unexpected_character_error() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": &123 + } + "# + ); + + let result = input.parse::(); + assert!( + result.is_err(), + "Expected parsing to fail due to unexpected character" + ); + } +} diff --git a/crates/uv-virtualenv/src/virtualenv.rs b/crates/uv-virtualenv/src/virtualenv.rs index e3412992b075..89552f477915 100644 --- a/crates/uv-virtualenv/src/virtualenv.rs +++ b/crates/uv-virtualenv/src/virtualenv.rs @@ -76,6 +76,7 @@ pub(crate) fn create( base_executable, interpreter.python_major(), interpreter.python_minor(), + interpreter.variant().suffix(), ) { Ok(path) => path, Err(err) => { @@ -654,7 +655,12 @@ fn copy_launcher_windows( /// environments. /// /// See: -fn find_base_python(executable: &Path, major: u8, minor: u8) -> Result { +fn find_base_python( + executable: &Path, + major: u8, + minor: u8, + suffix: &str, +) -> Result { /// Returns `true` if `path` is the root directory. fn is_root(path: &Path) -> bool { let mut components = path.components(); @@ -664,12 +670,12 @@ fn find_base_python(executable: &Path, major: u8, minor: u8) -> Result - fn is_prefix(dir: &Path, major: u8, minor: u8) -> bool { + fn is_prefix(dir: &Path, major: u8, minor: u8, suffix: &str) -> bool { if cfg!(windows) { dir.join("Lib").join("os.py").is_file() } else { dir.join("lib") - .join(format!("python{major}.{minor}")) + .join(format!("python{major}.{minor}{suffix}")) .join("os.py") .is_file() } @@ -685,7 +691,7 @@ fn find_base_python(executable: &Path, major: u8, minor: u8) -> Result