From f7a635ba413be6ef4e095d395c3a6450122530c0 Mon Sep 17 00:00:00 2001 From: messense Date: Fri, 4 Jun 2021 00:09:55 +0800 Subject: [PATCH] Replace `read_distribution` mod with `python-pkginfo` crate --- Cargo.lock | 46 +++++-- Cargo.toml | 1 + src/lib.rs | 4 - src/main.rs | 72 ++--------- src/read_distribution.rs | 254 --------------------------------------- src/upload.rs | 100 ++++++++++----- 6 files changed, 114 insertions(+), 363 deletions(-) delete mode 100644 src/read_distribution.rs diff --git a/Cargo.lock b/Cargo.lock index e99781972..7b97d4a51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,9 +95,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "backtrace" -version = "0.3.59" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744" +checksum = "b7815ea54e4d821e791162e078acbebfd6d8c8939cd559c9335dceb1c8ca7282" dependencies = [ "addr2line", "cc", @@ -188,6 +188,16 @@ dependencies = [ "libc", ] +[[package]] +name = "bzip2" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abf8012c8a15d5df745fcf258d93e6149dcf102882c8d8702d9cff778eab43a8" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2-sys" version = "0.1.10+1.0.8" @@ -651,9 +661,9 @@ checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] name = "heck" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] @@ -924,6 +934,7 @@ dependencies = [ "platform-info", "pretty_env_logger", "pyproject-toml", + "python-pkginfo", "regex", "reqwest", "rpassword", @@ -1116,9 +1127,12 @@ dependencies = [ [[package]] name = "object" -version = "0.24.0" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170" +checksum = "9023c1c0973b327f073c7f2fceb9bcc049862f93a7d14c6feb46c8a56460a0d5" +dependencies = [ + "memchr", +] [[package]] name = "once_cell" @@ -1290,6 +1304,20 @@ dependencies = [ "toml", ] +[[package]] +name = "python-pkginfo" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7b271593f2480097ac68389efe5daf86032f47fd13d9acabad8447b9fffc2a" +dependencies = [ + "bzip2 0.4.2", + "flate2", + "fs-err", + "mailparse", + "tar", + "zip", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1969,9 +1997,9 @@ dependencies = [ [[package]] name = "unicode-normalization" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33717dca7ac877f497014e10d73f3acf948c342bee31b5ca7892faf94ccc6b49" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" dependencies = [ "tinyvec", ] @@ -2240,7 +2268,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c83dc9b784d252127720168abd71ea82bf8c3d96b17dc565b5e2a02854f2b27" dependencies = [ "byteorder", - "bzip2", + "bzip2 0.3.3", "crc32fast", "flate2", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 133568e35..b725d7a08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ once_cell = "1.7.2" scroll = "0.10.2" target-lexicon = "0.12.0" pyproject-toml = "0.1.0" +python-pkginfo = "0.3.2" [dev-dependencies] indoc = "1.0.3" diff --git a/src/lib.rs b/src/lib.rs index 5ac67ade4..aa5e7bc75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,9 +38,6 @@ pub use crate::module_writer::{ }; pub use crate::pyproject_toml::PyProjectToml; pub use crate::python_interpreter::PythonInterpreter; -pub use crate::read_distribution::{ - get_metadata_for_distribution, get_supported_version_for_distribution, -}; pub use crate::target::Target; pub use auditwheel::PlatformTag; pub use source_distribution::source_distribution; @@ -61,7 +58,6 @@ mod metadata; mod module_writer; mod pyproject_toml; mod python_interpreter; -mod read_distribution; #[cfg(feature = "upload")] mod registry; mod source_distribution; diff --git a/src/main.rs b/src/main.rs index dffbc8d91..d1dc146fd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,54 +23,12 @@ use std::path::PathBuf; use structopt::StructOpt; #[cfg(feature = "upload")] use { - maturin::{ - get_metadata_for_distribution, get_supported_version_for_distribution, upload, - BuiltWheelMetadata, Registry, UploadError, - }, + maturin::{upload, Registry, UploadError}, reqwest::Url, rpassword, std::io, }; -#[cfg(feature = "upload")] -/// Upload item descriptor used by `upload_ui()` -struct UploadItem { - /// Built wheel file path - wheel_path: PathBuf, - /// Supported Python versions tag (e.g. "cp39") - supported_versions: String, - /// Wheel metadata in the (key, value) format - metadata: Vec<(String, String)>, -} - -#[cfg(feature = "upload")] -impl UploadItem { - /// Creates a new upload item descriptor from the built wheel and its metadata. - fn from_built_wheel(wheel: BuiltWheelMetadata, metadata: Vec<(String, String)>) -> Self { - let (wheel_path, supported_versions) = wheel; - - UploadItem { - wheel_path, - supported_versions, - metadata, - } - } - - /// Attempts to create a new upload item descriptor from the third-party wheel file path. - /// - /// Fails with the wheel metadata extraction errors. - fn try_from_wheel_path(wheel_path: PathBuf) -> Result { - let supported_versions = get_supported_version_for_distribution(&wheel_path)?; - let metadata = get_metadata_for_distribution(&wheel_path)?; - - Ok(UploadItem { - wheel_path, - supported_versions, - metadata, - }) - } -} - /// Returns the password and a bool that states whether to ask for re-entering the password /// after a failed authentication /// @@ -452,13 +410,13 @@ fn pep517(subcommand: Pep517Command) -> Result<()> { /// Handles authentication/keyring integration and retrying of the publish subcommand #[cfg(feature = "upload")] -fn upload_ui(items: &[UploadItem], publish: &PublishOpt) -> Result<()> { +fn upload_ui(items: &[PathBuf], publish: &PublishOpt) -> Result<()> { let registry = complete_registry(&publish)?; println!("🚀 Uploading {} packages", items.len()); for i in items { - let upload_result = upload(®istry, &i.wheel_path, &i.metadata, &i.supported_versions); + let upload_result = upload(®istry, &i); match upload_result { Ok(()) => (), @@ -482,10 +440,7 @@ fn upload_ui(items: &[UploadItem], publish: &PublishOpt) -> Result<()> { bail!("Username and/or password are wrong"); } Err(err) => { - let filename = i - .wheel_path - .file_name() - .unwrap_or_else(|| i.wheel_path.as_os_str()); + let filename = i.file_name().unwrap_or_else(|| i.as_os_str()); if let UploadError::FileExistsError(_) = err { if publish.skip_existing { eprintln!( @@ -495,11 +450,9 @@ fn upload_ui(items: &[UploadItem], publish: &PublishOpt) -> Result<()> { continue; } } - let filesize = fs::metadata(&i.wheel_path) + let filesize = fs::metadata(&i) .map(|x| ByteSize(x.len()).to_string()) - .unwrap_or_else(|e| { - format!("Failed to get the filesize of {:?}: {}", &i.wheel_path, e) - }); + .unwrap_or_else(|e| format!("Failed to get the filesize of {:?}: {}", &i, e)); return Err(err) .context(format!("💥 Failed to upload {:?} ({})", filename, filesize)); } @@ -559,7 +512,6 @@ fn run() -> Result<()> { eprintln!("âš  Warning: You're publishing debug wheels"); } - let metadata21 = build_context.metadata21.to_vec(); let mut wheels = build_context.build_wheels()?; if !no_sdist { if let Some(sd) = build_context.build_source_distribution()? { @@ -567,10 +519,7 @@ fn run() -> Result<()> { } } - let items = wheels - .into_iter() - .map(|wheel| UploadItem::from_built_wheel(wheel, metadata21.clone())) - .collect::>(); + let items = wheels.into_iter().map(|wheel| wheel.0).collect::>(); upload_ui(&items, &publish)? } @@ -658,12 +607,7 @@ fn run() -> Result<()> { return Ok(()); } - let items = files - .into_iter() - .map(UploadItem::try_from_wheel_path) - .collect::>>()?; - - upload_ui(&items, &publish)? + upload_ui(&files, &publish)? } } diff --git a/src/read_distribution.rs b/src/read_distribution.rs deleted file mode 100644 index d73a32211..000000000 --- a/src/read_distribution.rs +++ /dev/null @@ -1,254 +0,0 @@ -use anyhow::{bail, Context, Result}; -use flate2::read::GzDecoder; -use fs_err::File; -use mailparse::parse_mail; -use regex::Regex; -use std::io::{BufReader, Read}; -use std::path::{Path, PathBuf}; -use zip::ZipArchive; - -fn filename_from_file(path: impl AsRef) -> Result { - Ok(path - .as_ref() - .file_name() - .context("Missing filename")? - .to_str() - .context("Expected a utf-8 filename")? - .to_string()) -} - -/// Standard Python wheel filename components (tags) -/// -/// The wheel filename is "----.whl" -struct WheelFilenameParts { - name: String, - version: String, - python_tag: String, - #[allow(dead_code)] - abi_tag: String, - #[allow(dead_code)] - platform_tag: String, -} - -/// Parses the wheel filename into its components -/// -/// The wheel filename _must_ end with ".whl" -fn parse_wheel_filename(fname: &str) -> Result { - let split: Vec<_> = fname.strip_suffix(".whl").unwrap().split('-').collect(); - - let parts = match split.as_slice() { - [name, version, python_tag, abi_tag, platform_tag] => WheelFilenameParts { - name: name.to_string(), - version: version.to_string(), - python_tag: python_tag.to_string(), - abi_tag: abi_tag.to_string(), - platform_tag: platform_tag.to_string(), - }, - _ => bail!("The wheel filename is invalid: {}", fname), - }; - - Ok(parts) -} - -/// Read the email format into key value pairs -fn metadata_from_bytes(metadata_email: &mut Vec) -> Result> { - let metadata_email = parse_mail(&metadata_email).context("Failed to parse METADATA")?; - - let mut metadata = Vec::new(); - for header in &metadata_email.headers { - metadata.push((header.get_key().to_string(), header.get_value().to_string())); - } - - let body = metadata_email - .get_body() - .context("Failed to parse METADATA")?; - if !body.trim().is_empty() { - metadata.push(("Description".into(), body)); - } - Ok(metadata) -} - -/// Port of pip's `canonicalize_name` -/// https://github.com/pypa/pip/blob/b33e791742570215f15663410c3ed987d2253d5b/src/pip/_vendor/packaging/utils.py#L18-L25 -fn canonicalize_name(name: &str) -> String { - Regex::new("[-_.]+") - .unwrap() - .replace(name, "-") - .to_lowercase() -} - -/// Reads the METADATA file in the .dist-info directory of a wheel, returning -/// the metadata (https://packaging.python.org/specifications/core-metadata/) -/// as key value pairs -fn read_metadata_for_wheel(path: impl AsRef) -> Result> { - let filename = filename_from_file(path.as_ref())?; - let parts = parse_wheel_filename(&filename)?; - - let reader = BufReader::new(File::open(path.as_ref())?); - let mut archive = ZipArchive::new(reader).context("Failed to read file as zip")?; - - // The METADATA format is an email (RFC 822) - // pip's implementation: https://github.com/pypa/pip/blob/b33e791742570215f15663410c3ed987d2253d5b/src/pip/_internal/utils/wheel.py#L109-L144 - // twine's implementation: https://github.com/pypa/twine/blob/534385596820129b41cbcdcc83d34aa8788067f1/twine/wheel.py#L52-L56 - // We mostly follow pip - let mut metadata_email = Vec::new(); - - // Find the metadata file - let name = format!("{}-{}.dist-info/METADATA", parts.name, parts.version); - let metadata_files: Vec<_> = archive - .file_names() - .filter(|i| canonicalize_name(i) == canonicalize_name(&name)) - .map(ToString::to_string) - .collect(); - - match &metadata_files.as_slice() { - [] => bail!( - "This wheel does not contain a METADATA matching {}, which is mandatory for wheels", - name - ), - [metadata_file] => archive - .by_name(&metadata_file) - .context(format!("Failed to read METADATA file {}", metadata_file))? - .read_to_end(&mut metadata_email) - .context(format!("Failed to read METADATA file {}", metadata_file))?, - files => bail!( - "Found more than one metadata file matching {}: {:?}", - name, - files - ), - }; - - metadata_from_bytes(&mut metadata_email) -} - -/// Returns the metadata for a source distribution (.tar.gz). -/// Only parses the filename since dist-info is not part of source -/// distributions -fn read_metadata_for_source_distribution(path: impl AsRef) -> Result> { - // "dist/foo_ext-1.0.1.tar.gz" -> "foo_ext-1.0.1/PKG-INFO" - let mut pkginfo: PathBuf = path.as_ref().file_name().unwrap().into(); - pkginfo.set_extension(""); - pkginfo.set_extension(""); - pkginfo.push("PKG-INFO"); - - let mut reader = tar::Archive::new(GzDecoder::new(BufReader::new(File::open(path.as_ref())?))); - // Unlike for wheels, in source distributions the metadata is stored in a file called PKG-INFO - // try_find would be ideal here, but it's nightly only - let mut entry = reader - .entries()? - .map(|entry| -> Result<_> { - let entry = entry?; - if entry.path()? == pkginfo { - Ok(Some(entry)) - } else { - Ok(None) - } - }) - .find_map(|x| x.transpose()) - .context(format!( - "Source distribution {:?} does not contain a PKG-INFO, but it should", - path.as_ref() - ))? - .context(format!("Failed to read {:?}", path.as_ref()))?; - let mut metadata_email = Vec::new(); - entry - .read_to_end(&mut metadata_email) - .context(format!("Failed to read {:?}", path.as_ref()))?; - metadata_from_bytes(&mut metadata_email) -} - -/// Returns the metadata as key value pairs for a wheel or a source distribution -pub fn get_metadata_for_distribution(path: &Path) -> Result> { - let filename = filename_from_file(path)?; - if filename.ends_with(".whl") { - read_metadata_for_wheel(path) - .context(format!("Failed to read metadata from wheel at {:?}", path)) - } else if filename.ends_with(".tar.gz") { - read_metadata_for_source_distribution(path).context(format!( - "Failed to read metadata from source distribution at {:?}", - path - )) - } else { - bail!("File has an unknown extension: {:?}", path) - } -} - -/// Returns the supported Python interpreter version tag for a wheel or a source distribution -/// -/// The version tag is encoded in the wheel file name and usually looks like "py3" or "cp37". -/// For the source distributions the version tag is always "source". -pub fn get_supported_version_for_distribution(path: &Path) -> Result { - let filename = filename_from_file(path)?; - - let python_tag = if filename.ends_with(".whl") { - parse_wheel_filename(&filename)?.python_tag - } else if filename.ends_with(".tar.gz") { - "source".to_string() - } else { - bail!("File has an unknown extension: {:?}", path) - }; - - Ok(python_tag) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_source_distribution() { - let metadata = - get_metadata_for_distribution(Path::new("test-data/pyo3_mixed-2.1.1.tar.gz")).unwrap(); - let expected: Vec<_> = [ - ("Metadata-Version", "2.1"), - ("Name", "pyo3-mixed"), - ("Version", "2.1.1"), - ("Summary", "Implements a dummy function combining rust and python"), - ("Author", "konstin "), - ("Author-Email", "konstin "), - ("Description-Content-Type", "text/markdown; charset=UTF-8; variant=GFM"), - ("Description", "# pyo3-mixed\n\nA package for testing maturin with a mixed pyo3/python project.\n\n"), - ].iter().map(|(k,v)| (k.to_string(), v.to_string())).collect(); - - assert_eq!(metadata, expected); - } - - #[test] - fn test_wheel() { - let metadata = get_metadata_for_distribution(Path::new( - "test-data/pyo3_mixed-2.1.1-cp38-cp38-manylinux1_x86_64.whl", - )) - .unwrap(); - assert_eq!( - metadata.iter().map(|x| &x.0).collect::>(), - vec![ - "Metadata-Version", - "Name", - "Version", - "Summary", - "Author", - "Author-Email", - "Description-Content-Type", - "Description" - ] - ); - // Check the description - assert!(metadata[7].1.starts_with("# pyo3-mixed")); - assert!(metadata[7].1.ends_with("tox.ini\n\n")); - } - - #[test] - fn test_supported_version() { - let path = Path::new("test-data/pyo3_mixed-2.1.1.tar.gz"); - let supported_version = get_supported_version_for_distribution(path).unwrap(); - assert_eq!(supported_version, "source"); - - let path = Path::new("test-data/pyo3_mixed-2.1.1-cp38-cp38-manylinux1_x86_64.whl"); - let supported_version = get_supported_version_for_distribution(path).unwrap(); - assert_eq!(supported_version, "cp38"); - - let path = Path::new("test_data/pyo3_stubs-2.1.1-py3-none-any.whl"); - let supported_version = get_supported_version_for_distribution(path).unwrap(); - assert_eq!(supported_version, "py3"); - } -} diff --git a/src/upload.rs b/src/upload.rs index 1cd5b99f2..ce5a28e72 100644 --- a/src/upload.rs +++ b/src/upload.rs @@ -3,6 +3,7 @@ use crate::Registry; use fs_err::File; +use regex::Regex; use reqwest::{self, blocking::multipart::Form, blocking::Client, StatusCode}; use sha2::{Digest, Sha256}; use std::io; @@ -32,6 +33,9 @@ pub enum UploadError { /// File already exists #[error("File already exists: {0}")] FileExistsError(String), + /// Read package metadata error + #[error("Read package metadata error")] + PkgInfoError(#[source] python_pkginfo::Error), } impl From for UploadError { @@ -46,51 +50,83 @@ impl From for UploadError { } } +impl From for UploadError { + fn from(error: python_pkginfo::Error) -> Self { + UploadError::PkgInfoError(error) + } +} + +/// Port of pip's `canonicalize_name` +/// https://github.com/pypa/pip/blob/b33e791742570215f15663410c3ed987d2253d5b/src/pip/_vendor/packaging/utils.py#L18-L25 +fn canonicalize_name(name: &str) -> String { + Regex::new("[-_.]+") + .unwrap() + .replace(name, "-") + .to_lowercase() +} + /// Uploads a single wheel to the registry -pub fn upload( - registry: &Registry, - wheel_path: &Path, - metadata21: &[(String, String)], - supported_version: &str, -) -> Result<(), UploadError> { +pub fn upload(registry: &Registry, wheel_path: &Path) -> Result<(), UploadError> { let mut wheel = File::open(&wheel_path)?; let mut hasher = Sha256::new(); io::copy(&mut wheel, &mut hasher)?; let hash_hex = format!("{:x}", hasher.finalize()); + let dist = python_pkginfo::Distribution::new(wheel_path)?; + let metadata = dist.metadata(); + let mut api_metadata = vec![ - (":action".to_string(), "file_upload".to_string()), - ("sha256_digest".to_string(), hash_hex), - ("protocol_version".to_string(), "1".to_string()), + (":action", "file_upload".to_string()), + ("sha256_digest", hash_hex), + ("protocol_version", "1".to_string()), + ("metadata_version", metadata.metadata_version.clone()), + ("name", canonicalize_name(&metadata.name)), + ("version", metadata.version.clone()), + ("pyversion", dist.python_version().to_string()), + ("filetype", dist.r#type().to_string()), ]; - api_metadata.push(("pyversion".to_string(), supported_version.to_string())); + let mut add_option = |name, value: &Option| { + if let Some(some) = value.clone() { + api_metadata.push((name, some)); + } + }; - if supported_version != "source" { - api_metadata.push(("filetype".to_string(), "bdist_wheel".to_string())); - } else { - api_metadata.push(("filetype".to_string(), "sdist".to_string())); - } + // https://github.com/pypa/warehouse/blob/75061540e6ab5aae3f8758b569e926b6355abea8/warehouse/forklift/legacy.py#L424 + add_option("summary", &metadata.summary); + add_option("description", &metadata.description); + add_option( + "description_content_type", + &metadata.description_content_type, + ); + add_option("author", &metadata.author); + add_option("author_email", &metadata.author_email); + add_option("maintainer", &metadata.maintainer); + add_option("maintainer_email", &metadata.maintainer_email); + add_option("license", &metadata.license); + add_option("keywords", &metadata.keywords); + add_option("home_page", &metadata.home_page); + add_option("download_url", &metadata.download_url); + add_option("requires_path", &metadata.requires_python); + add_option("summary", &metadata.summary); + + let mut add_vec = |name, values: &[String]| { + for i in values { + api_metadata.push((name, i.clone())); + } + }; - let joined_metadata: Vec<(String, String)> = api_metadata - .into_iter() - // Type system shenanigans - .chain(metadata21.to_vec().into_iter()) - // All fields must be lower case and with underscores or they will be ignored by warehouse - .map(|(key, value)| { - let mut key = key.to_lowercase().replace("-", "_"); - if key == "classifier" { - // PyPI upload api expects `classifiers` instead of `classifier` - // See https://github.com/pypa/warehouse/issues/3151#issuecomment-796965735 - key = "classifiers".to_string(); - } - (key, value) - }) - .collect(); + add_vec("classifiers", &metadata.classifiers); + add_vec("platform", &metadata.platforms); + add_vec("requires_dist", &metadata.requires_dist); + add_vec("provides_dist", &metadata.provides_dist); + add_vec("obsoletes_dist", &metadata.obsoletes_dist); + add_vec("requires_external", &metadata.requires_external); + add_vec("project_urls", &metadata.project_urls); let mut form = Form::new(); - for (key, value) in joined_metadata { - form = form.text(key, value.to_owned()) + for (key, value) in api_metadata { + form = form.text(key, value); } form = form.file("content", &wheel_path)?;