Skip to content

Commit

Permalink
Add support for local directories with --index-url (#4226)
Browse files Browse the repository at this point in the history
## Summary

Closes #4078.
  • Loading branch information
charliermarsh authored Jun 11, 2024
1 parent f296ef0 commit 656fc42
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 31 deletions.
13 changes: 5 additions & 8 deletions crates/distribution-types/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ use std::fmt::{Display, Formatter};
use std::path::PathBuf;

use serde::{Deserialize, Serialize};
use thiserror::Error;
use url::Url;

use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use pep508_rs::split_scheme;
use pypi_types::{CoreMetadata, HashDigest, Yanked};

/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
#[derive(Debug, Error)]
#[derive(Debug, thiserror::Error)]
pub enum FileConversionError {
#[error("Failed to parse 'requires-python': `{0}`")]
RequiresPython(String, #[source] VersionSpecifiersParseError),
Expand Down Expand Up @@ -57,12 +56,10 @@ impl File {
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
size: file.size,
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
url: {
if split_scheme(&file.url).is_some() {
FileLocation::AbsoluteUrl(file.url)
} else {
FileLocation::RelativeUrl(base.to_string(), file.url)
}
url: if split_scheme(&file.url).is_some() {
FileLocation::AbsoluteUrl(file.url)
} else {
FileLocation::RelativeUrl(base.to_string(), file.url)
},
yanked: file.yanked,
})
Expand Down
2 changes: 2 additions & 0 deletions crates/distribution-types/src/index_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ impl FromStr for IndexUrl {
let url = VerbatimUrl::from_url(url).with_given(s.to_owned());
if *url.raw() == *PYPI_URL {
Ok(Self::Pypi(url))
} else if url.scheme() == "file" {
Ok(Self::Path(url))
} else {
Ok(Self::Url(url))
}
Expand Down
1 change: 1 addition & 0 deletions crates/pep508-rs/src/verbatim_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ impl Pep508Url for VerbatimUrl {
.with_given(url.to_string()),
)
}

// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Some(_) => {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Expand Down
10 changes: 5 additions & 5 deletions crates/uv-client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,15 @@ impl From<ErrorKind> for Error {

#[derive(Debug, thiserror::Error)]
pub enum ErrorKind {
/// An invalid URL was provided.
#[error(transparent)]
UrlParseError(#[from] url::ParseError),
UrlParse(#[from] url::ParseError),

/// A base URL could not be joined with a possibly relative URL.
#[error(transparent)]
JoinRelativeError(#[from] pypi_types::JoinRelativeError),
JoinRelativeUrl(#[from] pypi_types::JoinRelativeError),

#[error("Expected a file URL, but received: {0}")]
NonFileUrl(Url),

/// Dist-info error
#[error(transparent)]
DistInfo(#[from] install_wheel_rs::Error),

Expand Down
109 changes: 92 additions & 17 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::str::FromStr;

use async_http_range_reader::AsyncHttpRangeReader;
Expand All @@ -20,7 +20,7 @@ use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use platform_tags::Platform;
use pypi_types::{Metadata23, SimpleJson};
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_configuration::IndexStrategy;
use uv_configuration::KeyringProviderType;
use uv_normalize::PackageName;
Expand Down Expand Up @@ -258,6 +258,10 @@ impl RegistryClient {
Ok(results)
}

/// Fetch the [`SimpleMetadata`] from a single index for a given package.
///
/// The index can either be a PEP 503-compatible remote repository, or a local directory laid
/// out in the same format.
async fn simple_single_index(
&self,
package_name: &PackageName,
Expand Down Expand Up @@ -293,6 +297,22 @@ impl RegistryClient {
Connectivity::Offline => CacheControl::AllowStale,
};

if matches!(index, IndexUrl::Path(_)) {
self.fetch_local_index(package_name, &url).await.map(Ok)
} else {
self.fetch_remote_index(package_name, &url, &cache_entry, cache_control)
.await
}
}

/// Fetch the [`SimpleMetadata`] from a remote URL, using the PEP 503 Simple Repository API.
async fn fetch_remote_index(
&self,
package_name: &PackageName,
url: &Url,
cache_entry: &CacheEntry,
cache_control: CacheControl,
) -> Result<Result<OwnedArchive<SimpleMetadata>, CachedClientError<Error>>, Error> {
let simple_request = self
.uncached_client()
.get(url.clone())
Expand Down Expand Up @@ -331,10 +351,7 @@ impl RegistryClient {
}
MediaType::Html => {
let text = response.text().await.map_err(ErrorKind::from)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;

SimpleMetadata::from_files(files, package_name, base.as_url())
SimpleMetadata::from_html(&text, package_name, &url)?
}
};
OwnedArchive::from_unarchived(&unarchived)
Expand All @@ -346,14 +363,32 @@ impl RegistryClient {
.cached_client()
.get_cacheable(
simple_request,
&cache_entry,
cache_entry,
cache_control,
parse_simple_response,
)
.await;
Ok(result)
}

/// Fetch the [`SimpleMetadata`] from a local file, using a PEP 503-compatible directory
/// structure.
async fn fetch_local_index(
&self,
package_name: &PackageName,
url: &Url,
) -> Result<OwnedArchive<SimpleMetadata>, Error> {
let path = url
.to_file_path()
.map_err(|_| ErrorKind::NonFileUrl(url.clone()))?
.join("index.html");
let text = fs_err::tokio::read_to_string(&path)
.await
.map_err(ErrorKind::from)?;
let metadata = SimpleMetadata::from_html(&text, package_name, url)?;
OwnedArchive::from_unarchived(&metadata)
}

/// Fetch the metadata for a remote wheel file.
///
/// For a remote wheel, we try the following ways to fetch the metadata:
Expand All @@ -364,27 +399,56 @@ impl RegistryClient {
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata23, Error> {
let metadata = match &built_dist {
BuiltDist::Registry(wheels) => {
#[derive(Debug, Clone)]
enum WheelLocation {
/// A local file path.
Path(PathBuf),
/// A remote URL.
Url(Url),
}

let wheel = wheels.best_wheel();
match &wheel.file.url {

let location = match &wheel.file.url {
FileLocation::RelativeUrl(base, url) => {
let url = pypi_types::base_url_join_relative(base, url)
.map_err(ErrorKind::JoinRelativeError)?;
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
.map_err(ErrorKind::JoinRelativeUrl)?;
if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|_| ErrorKind::NonFileUrl(url.clone()))?;
WheelLocation::Path(path)
} else {
WheelLocation::Url(url)
}
}
FileLocation::AbsoluteUrl(url) => {
let url = Url::parse(url).map_err(ErrorKind::UrlParseError)?;
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
let url = Url::parse(url).map_err(ErrorKind::UrlParse)?;
if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|_| ErrorKind::NonFileUrl(url.clone()))?;
WheelLocation::Path(path)
} else {
WheelLocation::Url(url)
}
}
FileLocation::Path(path) => {
FileLocation::Path(path) => WheelLocation::Path(path.clone()),
};

match location {
WheelLocation::Path(path) => {
let file = fs_err::tokio::File::open(&path)
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader)
.await?
}
WheelLocation::Url(url) => {
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
.await?
}
}
}
BuiltDist::DirectUrl(wheel) => {
Expand Down Expand Up @@ -599,7 +663,7 @@ impl RegistryClient {
std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!(
"Failed to download distribution due to network timeout. Try increasing UV_HTTP_TIMEOUT (current value: {}s).", self.timeout()
"Failed to download distribution due to network timeout. Try increasing UV_HTTP_TIMEOUT (current value: {}s).", self.timeout()
),
)
} else {
Expand Down Expand Up @@ -772,7 +836,6 @@ impl SimpleMetadata {
DistFilename::SourceDistFilename(ref inner) => &inner.version,
DistFilename::WheelFilename(ref inner) => &inner.version,
};

let file = match File::try_from(file, base) {
Ok(file) => file,
Err(err) => {
Expand All @@ -799,6 +862,18 @@ impl SimpleMetadata {
.collect(),
)
}

/// Read the [`SimpleMetadata`] from an HTML index.
fn from_html(text: &str, package_name: &PackageName, url: &Url) -> Result<Self, Error> {
let SimpleHtml { base, files } =
SimpleHtml::parse(text, url).map_err(|err| Error::from_html_err(err, url.clone()))?;

Ok(SimpleMetadata::from_files(
files,
package_name,
base.as_url(),
))
}
}

impl IntoIterator for SimpleMetadata {
Expand Down
11 changes: 10 additions & 1 deletion crates/uv-distribution/src/distribution_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()),
wheel.filename.stem(),
);

return self
.load_wheel(path, &wheel.filename, cache_entry, dist, hashes)
.await;
Expand All @@ -185,6 +184,16 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
wheel.filename.stem(),
);

// If the URL is a file URL, load the wheel directly.
if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|()| Error::NonFileUrl(url.clone()))?;
return self
.load_wheel(&path, &wheel.filename, wheel_entry, dist, hashes)
.await;
}

// Download and unzip.
match self
.stream_wheel(
Expand Down
3 changes: 3 additions & 0 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::path::PathBuf;

use tokio::task::JoinError;
use url::Url;
use zip::result::ZipError;

use crate::metadata::MetadataError;
Expand All @@ -25,6 +26,8 @@ pub enum Error {
RelativePath(PathBuf),
#[error(transparent)]
JoinRelativeUrl(#[from] pypi_types::JoinRelativeError),
#[error("Expected a file URL, but received: {0}")]
NonFileUrl(Url),
#[error(transparent)]
Git(#[from] uv_git::GitResolverError),
#[error(transparent)]
Expand Down
39 changes: 39 additions & 0 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,26 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
};

// If the URL is a file URL, use the local path directly.
if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|()| Error::NonFileUrl(url.clone()))?;
return self
.archive(
source,
&PathSourceUrl {
url: &url,
path: Cow::Owned(path),
},
&cache_shard,
tags,
hashes,
)
.boxed_local()
.await;
}

self.url(
source,
&dist.file.filename,
Expand Down Expand Up @@ -281,6 +301,25 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
};

// If the URL is a file URL, use the local path directly.
if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|()| Error::NonFileUrl(url.clone()))?;
return self
.archive_metadata(
source,
&PathSourceUrl {
url: &url,
path: Cow::Owned(path),
},
&cache_shard,
hashes,
)
.boxed_local()
.await;
}

self.url_metadata(
source,
&dist.file.filename,
Expand Down
Loading

0 comments on commit 656fc42

Please sign in to comment.