Skip to content

Commit

Permalink
Add an editable index to the site-packages registry (#671)
Browse files Browse the repository at this point in the history
This PR modifies `SitePackages` to store all distributions in a flat
vector, and maintain two indexes (hash maps) from "per-element data for
an element in the vector" to "index of that element". This enables us to
maintain a map on both package name and editable URL.
  • Loading branch information
charliermarsh authored Dec 17, 2023
1 parent 08edd17 commit 00e1c33
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 54 deletions.
12 changes: 12 additions & 0 deletions crates/distribution-types/src/installed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::str::FromStr;

use anyhow::{anyhow, Context, Result};
use fs_err as fs;
use url::Url;

use pep440_rs::Version;
use puffin_normalize::PackageName;
Expand Down Expand Up @@ -140,4 +141,15 @@ impl InstalledDist {
Metadata21::parse(&contents)
.with_context(|| format!("Failed to parse METADATA file at: {}", path.display()))
}

/// Return the [`Url`] of the distribution, if it is editable.
pub fn editable(&self) -> Option<&Url> {
match self {
Self::Url(InstalledDirectUrlDist {
url: DirectUrl::LocalDirectory { url, dir_info },
..
}) if dir_info.editable == Some(true) => Some(url),
_ => None,
}
}
}
7 changes: 6 additions & 1 deletion crates/puffin-cli/src/commands/freeze.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use anyhow::Result;
use distribution_types::Metadata;
use itertools::Itertools;
use tracing::debug;

use platform_host::Platform;
Expand All @@ -21,7 +23,10 @@ pub(crate) fn freeze(cache: &Cache, _printer: Printer) -> Result<ExitStatus> {

// Build the installed index.
let site_packages = SitePackages::from_executable(&python)?;
for dist in site_packages.distributions() {
for dist in site_packages
.iter()
.sorted_unstable_by(|a, b| a.name().cmp(b.name()))
{
#[allow(clippy::print_stdout)]
{
println!("{dist}");
Expand Down
6 changes: 1 addition & 5 deletions crates/puffin-cli/src/commands/pip_uninstall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,7 @@ pub(crate) async fn pip_uninstall(

// Identify all editables that are installed.
for editable in &editables {
if let Some(distribution) = site_packages
.editables()
.find(|(_dist, url, _dir_info)| url == editable)
.map(|(dist, _url, _dir_info)| dist)
{
if let Some(distribution) = site_packages.get_editable(editable) {
distributions.push(distribution);
} else {
writeln!(
Expand Down
15 changes: 6 additions & 9 deletions crates/puffin-installer/src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,10 @@ impl InstallPlan {
let mut seen =
FxHashMap::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default());

// TODO(charlie): This is quadratic. We should index the editable requirements by URL.
// Remove any editable requirements.
for editable in editable_requirements {
let editable_dist = site_packages
.editables()
.find(|(_dist, url, _dir_info)| url == &editable.url().raw())
.map(|(dist, _url, _dir_info)| dist.clone());
if let Some(dist) = editable_dist {
if site_packages.remove_editable(editable.raw()).is_some() {
debug!("Treating editable requirement as immutable: {editable}");
site_packages.remove(dist.name());
} else {
editables.push(editable.clone());
}
Expand Down Expand Up @@ -285,8 +280,10 @@ impl InstallPlan {
// If Puffin created the virtual environment, then remove all packages, regardless of
// whether they're considered "seed" packages.
let seed_packages = !venv.cfg().is_ok_and(|cfg| cfg.is_gourgeist());
for (package, dist_info) in site_packages {
if seed_packages && matches!(package.as_ref(), "pip" | "setuptools" | "wheel") {
for dist_info in site_packages {
if seed_packages
&& matches!(dist_info.name().as_ref(), "pip" | "setuptools" | "wheel")
{
debug!("Preserving seed package: {dist_info}");
continue;
}
Expand Down
143 changes: 104 additions & 39 deletions crates/puffin-installer/src/site_packages.rs
Original file line number Diff line number Diff line change
@@ -1,60 +1,96 @@
use std::collections::BTreeMap;
use std::hash::BuildHasherDefault;

use anyhow::{Context, Result};
use fs_err as fs;
use rustc_hash::FxHashSet;
use rustc_hash::{FxHashMap, FxHashSet};
use url::Url;

use distribution_types::{InstalledDirectUrlDist, InstalledDist, Metadata, VersionOrUrl};
use distribution_types::{InstalledDist, Metadata, VersionOrUrl};
use pep440_rs::{Version, VersionSpecifiers};
use pep508_rs::Requirement;
use puffin_interpreter::Virtualenv;
use puffin_normalize::PackageName;
use pypi_types::{DirInfo, DirectUrl};

/// An index over the packages installed in an environment.
///
/// Packages are indexed by both name and (for editable installs) URL.
#[derive(Debug)]
pub struct SitePackages<'a> {
venv: &'a Virtualenv,
index: BTreeMap<PackageName, InstalledDist>,
/// The vector of all installed distributions.
distributions: Vec<InstalledDist>,
/// The installed distributions, keyed by name.
by_name: FxHashMap<PackageName, usize>,
/// The installed editable distributions, keyed by URL.
by_url: FxHashMap<Url, usize>,
}

impl<'a> SitePackages<'a> {
/// Build an index of installed packages from the given Python executable.
pub fn from_executable(venv: &'a Virtualenv) -> Result<SitePackages<'a>> {
let mut index = BTreeMap::new();
let mut distributions: Vec<InstalledDist> = Vec::new();
let mut by_name = FxHashMap::default();
let mut by_url = FxHashMap::default();

// Index all installed packages by name.
for entry in fs::read_dir(venv.site_packages())? {
let entry = entry?;
if entry.file_type()?.is_dir() {
if let Some(dist_info) =
let Some(dist_info) =
InstalledDist::try_from_path(&entry.path()).with_context(|| {
format!("Failed to read metadata: from {}", entry.path().display())
})?
{
if let Some(existing) = index.insert(dist_info.name().clone(), dist_info) {
else {
continue;
};

let idx = distributions.len();

// Index the distribution by name.
if let Some(existing) = by_name.insert(dist_info.name().clone(), idx) {
let existing = &distributions[existing];
anyhow::bail!(
"Found duplicate package in environment: {} ({} vs. {})",
existing.name(),
existing.path().display(),
entry.path().display()
);
}

// Index the distribution by URL.
if let Some(url) = dist_info.editable() {
if let Some(existing) = by_url.insert(url.clone(), idx) {
let existing = &distributions[existing];
anyhow::bail!(
"Found duplicate package in environment: {} ({} vs. {})",
"Found duplicate editable in environment: {} ({} vs. {})",
existing.name(),
existing.path().display(),
entry.path().display()
);
}
}

// Add the distribution to the database.
distributions.push(dist_info);
}
}

Ok(Self { venv, index })
Ok(Self {
venv,
distributions,
by_name,
by_url,
})
}

/// Returns an iterator over the installed distributions.
pub fn distributions(&self) -> impl Iterator<Item = &InstalledDist> {
self.index.values()
pub fn iter(&self) -> impl Iterator<Item = &InstalledDist> {
self.distributions.iter()
}

/// Returns an iterator over the the installed distributions, represented as requirements.
pub fn requirements(&self) -> impl Iterator<Item = pep508_rs::Requirement> + '_ {
self.distributions().map(|dist| pep508_rs::Requirement {
pub fn requirements(&self) -> impl Iterator<Item = Requirement> + '_ {
self.iter().map(|dist| Requirement {
name: dist.name().clone(),
extras: None,
version_or_url: Some(match dist.version_or_url() {
Expand All @@ -69,45 +105,66 @@ impl<'a> SitePackages<'a> {
})
}

/// Returns an iterator over the installed editables.
pub fn editables(&self) -> impl Iterator<Item = (&InstalledDist, &Url, &DirInfo)> {
self.distributions().filter_map(|dist| {
// Editable installs are recorded through this type only
match dist {
InstalledDist::Url(InstalledDirectUrlDist {
url: DirectUrl::LocalDirectory { url, dir_info },
..
}) if dir_info.editable == Some(true) => Some((dist, url, dir_info)),
_ => None,
}
})
}

/// Returns the version of the given package, if it is installed.
pub fn get(&self, name: &PackageName) -> Option<&InstalledDist> {
self.index.get(name)
self.by_name.get(name).map(|idx| &self.distributions[*idx])
}

/// Remove the given package from the index, returning its version if it was installed.
pub fn remove(&mut self, name: &PackageName) -> Option<InstalledDist> {
self.index.remove(name)
let idx = self.by_name.get(name)?;
Some(self.swap_remove(*idx))
}

/// Returns the editable distribution installed from the given URL, if any.
pub fn get_editable(&self, url: &Url) -> Option<&InstalledDist> {
self.by_url.get(url).map(|idx| &self.distributions[*idx])
}

/// Remove the editable distribution installed from the given URL, if any.
pub fn remove_editable(&mut self, url: &Url) -> Option<InstalledDist> {
let idx = self.by_url.get(url)?;
Some(self.swap_remove(*idx))
}

/// Remove the distribution at the given index.
fn swap_remove(&mut self, idx: usize) -> InstalledDist {
// Remove from the existing index.
let dist = self.distributions.swap_remove(idx);

// If the distribution wasn't at the end, rewrite the entries for the moved distribution.
if idx < self.distributions.len() {
let moved = &self.distributions[idx];
if let Some(prev) = self.by_name.get_mut(moved.name()) {
*prev = idx;
}
if let Some(url) = moved.editable() {
if let Some(prev) = self.by_url.get_mut(url) {
*prev = idx;
}
}
}

dist
}

/// Returns `true` if there are no installed packages.
pub fn is_empty(&self) -> bool {
self.index.is_empty()
self.distributions.is_empty()
}

/// Returns the number of installed packages.
pub fn len(&self) -> usize {
self.index.len()
self.distributions.len()
}

/// Validate the installed packages in the virtual environment.
pub fn diagnostics(&self) -> Result<Vec<Diagnostic>> {
let mut diagnostics = Vec::new();

for (package, distribution) in &self.index {
for (package, index) in &self.by_name {
let distribution = &self.distributions[*index];

// Determine the dependencies for the given package.
let metadata = distribution.metadata()?;

Expand All @@ -128,7 +185,11 @@ impl<'a> SitePackages<'a> {
continue;
}

let Some(installed) = self.index.get(&requirement.name) else {
let Some(installed) = self
.by_name
.get(&requirement.name)
.map(|idx| &self.distributions[*idx])
else {
diagnostics.push(Diagnostic::MissingDependency {
package: package.clone(),
requirement: requirement.clone(),
Expand Down Expand Up @@ -171,7 +232,11 @@ impl<'a> SitePackages<'a> {
continue;
}

let Some(distribution) = self.index.get(&requirement.name) else {
let Some(distribution) = self
.by_name
.get(&requirement.name)
.map(|idx| &self.distributions[*idx])
else {
// The package isn't installed.
return Ok(false);
};
Expand Down Expand Up @@ -215,11 +280,11 @@ impl<'a> SitePackages<'a> {
}

impl IntoIterator for SitePackages<'_> {
type Item = (PackageName, InstalledDist);
type IntoIter = std::collections::btree_map::IntoIter<PackageName, InstalledDist>;
type Item = InstalledDist;
type IntoIter = std::vec::IntoIter<Self::Item>;

fn into_iter(self) -> Self::IntoIter {
self.index.into_iter()
self.distributions.into_iter()
}
}

Expand Down

0 comments on commit 00e1c33

Please sign in to comment.