diff --git a/Cargo.lock b/Cargo.lock index d5f42c18e6cf..16754df989fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1840,6 +1840,7 @@ dependencies = [ "distribution-filename", "fs-err", "indoc", + "mailparse", "pathdiff", "pep440_rs", "platform-info", diff --git a/crates/install-wheel-rs/Cargo.toml b/crates/install-wheel-rs/Cargo.toml index eec879177f6e..7ca22b8ed5c6 100644 --- a/crates/install-wheel-rs/Cargo.toml +++ b/crates/install-wheel-rs/Cargo.toml @@ -33,6 +33,7 @@ configparser = { workspace = true } csv = { workspace = true } data-encoding = { workspace = true } fs-err = { workspace = true } +mailparse = { workspace = true } pathdiff = { workspace = true } platform-info = { workspace = true } reflink-copy = { workspace = true } diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index 3698fca56cb5..f2f46d118145 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -1,11 +1,12 @@ use std::collections::HashMap; -use std::io::{BufRead, BufReader, Cursor, Read, Seek, Write}; +use std::io::{BufReader, Cursor, Read, Seek, Write}; use std::path::{Path, PathBuf}; use std::{env, io}; use data_encoding::BASE64URL_NOPAD; use fs_err as fs; use fs_err::{DirEntry, File}; +use mailparse::parse_headers; use rustc_hash::FxHashMap; use sha2::{Digest, Sha256}; use tracing::{instrument, warn}; @@ -353,10 +354,10 @@ pub enum LibKind { /// Parse WHEEL file. /// /// > {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same -/// > basic key: value format: +/// > email message format: pub fn parse_wheel_file(wheel_text: &str) -> Result { - // {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same basic key: value format: - let data = parse_key_value_file(&mut wheel_text.as_bytes(), "WHEEL")?; + // {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same email message format: + let data = parse_email_message_file(&mut wheel_text.as_bytes(), "WHEEL")?; // Determine whether Root-Is-Purelib == ‘true’. // If it is, the wheel is pure, and should be installed into purelib. @@ -797,29 +798,39 @@ pub fn read_record_file(record: &mut impl Read) -> Result, Erro .collect() } -/// Parse a file with `Key: value` entries such as WHEEL and METADATA -fn parse_key_value_file( +/// Parse a file with email message format such as WHEEL and METADATA +fn parse_email_message_file( file: impl Read, debug_filename: &str, ) -> Result>, Error> { let mut data: FxHashMap> = FxHashMap::default(); let file = BufReader::new(file); - for (line_no, line) in file.lines().enumerate() { - let line = line?.trim().to_string(); - if line.is_empty() { - continue; + let content = file.bytes().collect::, _>>()?; + + let headers = parse_headers(content.as_slice()) + .map_err(|err| { + Error::InvalidWheel(format!("Failed to parse {debug_filename} file: {err}")) + })? + .0; + + for header in headers { + let mut name = header.get_key(); + let mut value = header.get_value(); + + // Trim the name and value only if needed, avoiding unnecessary allocations with .trim().to_string(). + let trimmed_name = name.trim(); + if name == trimmed_name { + name = trimmed_name.to_string(); } - let (key, value) = line.split_once(':').ok_or_else(|| { - Error::InvalidWheel(format!( - "Line {} of the {debug_filename} file is invalid", - line_no + 1 - )) - })?; - data.entry(key.trim().to_string()) - .or_default() - .push(value.trim().to_string()); + let trimmed_value = value.trim(); + if value == trimmed_value { + value = trimmed_value.to_string(); + } + + data.entry(name).or_default().push(value); } + Ok(data) } @@ -836,11 +847,11 @@ mod test { use crate::Error; use super::{ - get_script_executable, parse_key_value_file, parse_wheel_file, read_record_file, Script, + get_script_executable, parse_email_message_file, parse_wheel_file, read_record_file, Script, }; #[test] - fn test_parse_key_value_file() { + fn test_parse_email_message_file() { let text = indoc! {" Wheel-Version: 1.0 Generator: bdist_wheel (0.37.1) @@ -849,7 +860,21 @@ mod test { Tag: cp38-cp38-manylinux2014_x86_64 "}; - parse_key_value_file(&mut text.as_bytes(), "WHEEL").unwrap(); + parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap(); + } + + #[test] + fn test_parse_email_message_file_with_value_starting_with_linesep_and_two_space() { + // Check: https://files.pythonhosted.org/packages/0c/b7/ecfdce6368cc3664d301f7f52db4fe1004aa7da7a12c4a9bf1de534ff6ab/ziglang-0.13.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl + let text = indoc! {" + Wheel-Version: 1.0 + Generator: ziglang make_wheels.py + Root-Is-Purelib: false + Tag: + py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64 + "}; + + parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap(); } #[test] @@ -996,7 +1021,7 @@ mod test { " }; let reader = Cursor::new(wheel.to_string().into_bytes()); - let wheel_file = parse_key_value_file(reader, "WHEEL")?; + let wheel_file = parse_email_message_file(reader, "WHEEL")?; assert_eq!( wheel_file.get("Wheel-Version"), Some(&["1.0".to_string()].to_vec())