Skip to content

Commit

Permalink
Patch sysconfig data at install time (#9857)
Browse files Browse the repository at this point in the history
## Summary

This PR reimplements
[`sysconfigpatcher`](https://github.com/bluss/sysconfigpatcher) in Rust
and applies it to our Python installations at install-time, ensuring
that the `sysconfig` data is more likely to be correct.

For now, we only rewrite prefixes (i.e., any path that starts with
`/install` gets rewritten to the correct absolute path for the current
machine).

Unlike `sysconfigpatcher`, this PR does not yet do any of the following:

- Patch `pkginfo` files.
- Change `clang` references to `cc`.

A few things that we should do as follow-ups, in my opinion:

1. Rewrite
[`AR`](https://github.com/bluss/sysconfigpatcher/blob/c1ebf8ab9274dcde255484d93ce0f1fd1f76a248/src/sysconfigpatcher.py#L61).
2. Remove `-isysroot`, which we already do for newer builds.
  • Loading branch information
charliermarsh authored Dec 13, 2024
1 parent 5903ce5 commit d2fb4c5
Show file tree
Hide file tree
Showing 13 changed files with 817 additions and 19 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ windows-result = { workspace = true }
anyhow = { version = "1.0.89" }
assert_fs = { version = "1.1.2" }
indoc = { workspace = true }
insta = { version = "1.40.0" }
itertools = { version = "0.13.0" }
temp-env = { version = "0.3.6" }
tempfile = { workspace = true }
Expand Down
5 changes: 4 additions & 1 deletion crates/uv-python/python/get_interpreter_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,10 @@ def main() -> None:
"sys_executable": sys.executable,
"sys_path": sys.path,
"stdlib": sysconfig.get_path("stdlib"),
"sysconfig_prefix": sysconfig.get_config_var("prefix"),
# Prior to the introduction of `sysconfig` patching, python-build-standalone installations would always use
# "/install" as the prefix. With `sysconfig` patching, we rewrite the prefix to match the actual installation
# location. So in newer versions, we also write a dedicated flag to indicate standalone builds.
"standalone": sysconfig.get_config_var("prefix") == "/install" or bool(sysconfig.get_config_var("PYTHON_BUILD_STANDALONE")),
"scheme": get_scheme(),
"virtualenv": get_virtualenv(),
"platform": os_and_arch,
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-python/src/discovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2518,7 +2518,7 @@ fn disjunction(items: &[&str]) -> String {
fn try_into_u8_slice(release: &[u64]) -> Result<Vec<u8>, std::num::TryFromIntError> {
release
.iter()
.map(|x| match (*x).try_into() {
.map(|x| match u8::try_from(*x) {
Ok(x) => Ok(x),
Err(e) => Err(e),
})
Expand All @@ -2527,7 +2527,7 @@ fn try_into_u8_slice(release: &[u64]) -> Result<Vec<u8>, std::num::TryFromIntErr

/// Convert a wheel tag formatted version (e.g., `38`) to multiple components (e.g., `3.8`).
///
/// The major version is always assumed to be a single digit 0-9. The minor version is all of
/// The major version is always assumed to be a single digit 0-9. The minor version is all
/// the following content.
///
/// If not a wheel tag formatted version, the input is returned unchanged.
Expand Down
1 change: 1 addition & 0 deletions crates/uv-python/src/installation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ impl PythonInstallation {

let installed = ManagedPythonInstallation::new(path)?;
installed.ensure_externally_managed()?;
installed.ensure_sysconfig_patched()?;
installed.ensure_canonical_executables()?;

Ok(Self {
Expand Down
15 changes: 5 additions & 10 deletions crates/uv-python/src/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub struct Interpreter {
sys_executable: PathBuf,
sys_path: Vec<PathBuf>,
stdlib: PathBuf,
sysconfig_prefix: Option<PathBuf>,
standalone: bool,
tags: OnceLock<Tags>,
target: Option<Target>,
prefix: Option<Prefix>,
Expand Down Expand Up @@ -80,7 +80,7 @@ impl Interpreter {
sys_executable: info.sys_executable,
sys_path: info.sys_path,
stdlib: info.stdlib,
sysconfig_prefix: info.sysconfig_prefix,
standalone: info.standalone,
tags: OnceLock::new(),
target: None,
prefix: None,
Expand Down Expand Up @@ -368,11 +368,6 @@ impl Interpreter {
&self.stdlib
}

/// Return the `prefix` path for this Python interpreter, as returned by `sysconfig.get_config_var("prefix")`.
pub fn sysconfig_prefix(&self) -> Option<&Path> {
self.sysconfig_prefix.as_deref()
}

/// Return the `purelib` path for this Python interpreter, as returned by `sysconfig.get_paths()`.
pub fn purelib(&self) -> &Path {
&self.scheme.purelib
Expand Down Expand Up @@ -441,8 +436,7 @@ impl Interpreter {
///
/// See: <https://github.com/indygreg/python-build-standalone/issues/382>
pub fn is_standalone(&self) -> bool {
self.sysconfig_prefix()
.is_some_and(|prefix| prefix == Path::new("/install"))
self.standalone
}

/// Return the [`Layout`] environment used to install wheels into this interpreter.
Expand Down Expand Up @@ -626,7 +620,7 @@ struct InterpreterInfo {
sys_executable: PathBuf,
sys_path: Vec<PathBuf>,
stdlib: PathBuf,
sysconfig_prefix: Option<PathBuf>,
standalone: bool,
pointer_size: PointerSize,
gil_disabled: bool,
}
Expand Down Expand Up @@ -854,6 +848,7 @@ mod tests {
"arch": "x86_64"
},
"manylinux_compatible": false,
"standalone": false,
"markers": {
"implementation_name": "cpython",
"implementation_version": "3.12.0",
Expand Down
4 changes: 3 additions & 1 deletion crates/uv-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub use crate::discovery::{
find_python_installations, EnvironmentPreference, Error as DiscoveryError, PythonDownloads,
PythonNotFound, PythonPreference, PythonRequest, PythonSource, PythonVariant, VersionRequest,
};
pub use crate::environment::{InvalidEnvironment, InvalidEnvironmentKind, PythonEnvironment};
pub use crate::environment::{InvalidEnvironmentKind, PythonEnvironment};
pub use crate::implementation::ImplementationName;
pub use crate::installation::{PythonInstallation, PythonInstallationKey};
pub use crate::interpreter::{Error as InterpreterError, Interpreter};
Expand Down Expand Up @@ -39,6 +39,7 @@ mod prefix;
#[cfg(windows)]
mod py_launcher;
mod python_version;
mod sysconfig;
mod target;
mod version_files;
mod virtualenv;
Expand Down Expand Up @@ -220,6 +221,7 @@ mod tests {
"arch": "x86_64"
},
"manylinux_compatible": true,
"standalone": true,
"markers": {
"implementation_name": "{IMPLEMENTATION}",
"implementation_version": "{FULL_VERSION}",
Expand Down
19 changes: 18 additions & 1 deletion crates/uv-python/src/managed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::libc::LibcDetectionError;
use crate::platform::Error as PlatformError;
use crate::platform::{Arch, Libc, Os};
use crate::python_version::PythonVersion;
use crate::{PythonRequest, PythonVariant};
use crate::{sysconfig, PythonRequest, PythonVariant};
#[derive(Error, Debug)]
pub enum Error {
#[error(transparent)]
Expand All @@ -40,6 +40,8 @@ pub enum Error {
InvalidPythonVersion(String),
#[error(transparent)]
ExtractError(#[from] uv_extract::Error),
#[error(transparent)]
SysconfigError(#[from] sysconfig::Error),
#[error("Failed to copy to: {0}", to.user_display())]
CopyError {
to: PathBuf,
Expand Down Expand Up @@ -491,6 +493,21 @@ impl ManagedPythonInstallation {
Ok(())
}

/// Ensure that the `sysconfig` data is patched to match the installation path.
pub fn ensure_sysconfig_patched(&self) -> Result<(), Error> {
if cfg!(unix) {
if *self.implementation() == ImplementationName::CPython {
sysconfig::update_sysconfig(
self.path(),
self.key.major,
self.key.minor,
self.key.variant.suffix(),
)?;
}
}
Ok(())
}

/// Create a link to the managed Python executable.
///
/// If the file already exists at the target path, an error will be returned.
Expand Down
148 changes: 148 additions & 0 deletions crates/uv-python/src/sysconfig/cursor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#![allow(dead_code)]

use std::str::Chars;

pub(super) const EOF_CHAR: char = '\0';

/// A cursor represents a pointer in the source code.
///
/// Based on [`rustc`'s `Cursor`](https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs)
#[derive(Clone, Debug)]
pub(super) struct Cursor<'src> {
/// An iterator over the [`char`]'s of the source code.
chars: Chars<'src>,

/// Stores the previous character for debug assertions.
#[cfg(debug_assertions)]
prev_char: char,
}

impl<'src> Cursor<'src> {
pub(super) fn new(source: &'src str) -> Self {
Self {
chars: source.chars(),
#[cfg(debug_assertions)]
prev_char: EOF_CHAR,
}
}

/// Returns the previous character. Useful for debug assertions.
#[cfg(debug_assertions)]
pub(super) const fn previous(&self) -> char {
self.prev_char
}

/// Peeks the next character from the input stream without consuming it.
/// Returns [`EOF_CHAR`] if the position is past the end of the file.
pub(super) fn first(&self) -> char {
self.chars.clone().next().unwrap_or(EOF_CHAR)
}

/// Peeks the second character from the input stream without consuming it.
/// Returns [`EOF_CHAR`] if the position is past the end of the file.
pub(super) fn second(&self) -> char {
let mut chars = self.chars.clone();
chars.next();
chars.next().unwrap_or(EOF_CHAR)
}

/// Returns the remaining text to lex.
///
/// Use [`Cursor::text_len`] to get the length of the remaining text.
pub(super) fn rest(&self) -> &'src str {
self.chars.as_str()
}

/// Returns `true` if the cursor is at the end of file.
pub(super) fn is_eof(&self) -> bool {
self.chars.as_str().is_empty()
}

/// Moves the cursor to the next character, returning the previous character.
/// Returns [`None`] if there is no next character.
pub(super) fn bump(&mut self) -> Option<char> {
let prev = self.chars.next()?;

#[cfg(debug_assertions)]
{
self.prev_char = prev;
}

Some(prev)
}

pub(super) fn eat_char(&mut self, c: char) -> bool {
if self.first() == c {
self.bump();
true
} else {
false
}
}

pub(super) fn eat_char2(&mut self, c1: char, c2: char) -> bool {
let mut chars = self.chars.clone();
if chars.next() == Some(c1) && chars.next() == Some(c2) {
self.bump();
self.bump();
true
} else {
false
}
}

pub(super) fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool {
let mut chars = self.chars.clone();
if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) {
self.bump();
self.bump();
self.bump();
true
} else {
false
}
}

pub(super) fn eat_if<F>(&mut self, mut predicate: F) -> Option<char>
where
F: FnMut(char) -> bool,
{
if predicate(self.first()) && !self.is_eof() {
self.bump()
} else {
None
}
}

/// Eats symbols while predicate returns true or until the end of file is reached.
#[inline]
pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}

/// Skips the next `count` bytes.
///
/// ## Panics
/// - If `count` is larger than the remaining bytes in the input stream.
/// - If `count` indexes into a multi-byte character.
pub(super) fn skip_bytes(&mut self, count: usize) {
#[cfg(debug_assertions)]
{
self.prev_char = self.chars.as_str()[..count]
.chars()
.next_back()
.unwrap_or('\0');
}

self.chars = self.chars.as_str()[count..].chars();
}

/// Skips to the end of the input stream.
pub(super) fn skip_to_end(&mut self) {
self.chars = "".chars();
}
}
Loading

0 comments on commit d2fb4c5

Please sign in to comment.