Skip to content

Commit

Permalink
Parses wheels WHEEL and METADATA files content as email messages (#6616)
Browse files Browse the repository at this point in the history
## Summary

Fixes: #6615 
Currently, some packages are not installable with `uv`, like `ziglang`
on Linux.
Everything is described in the issue! 😄 

<!-- What's the purpose of the change? What does it do, and why? -->

## Test Plan

<!-- How was it tested? -->
I added a unit test for the problematic use case.
I also checked that previous unit test are still running in order to
ensure the backward compatibility.
  • Loading branch information
Coruscant11 authored Aug 25, 2024
1 parent 069b021 commit 2bfc450
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 23 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/install-wheel-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ configparser = { workspace = true }
csv = { workspace = true }
data-encoding = { workspace = true }
fs-err = { workspace = true }
mailparse = { workspace = true }
pathdiff = { workspace = true }
platform-info = { workspace = true }
reflink-copy = { workspace = true }
Expand Down
71 changes: 48 additions & 23 deletions crates/install-wheel-rs/src/wheel.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::collections::HashMap;
use std::io::{BufRead, BufReader, Cursor, Read, Seek, Write};
use std::io::{BufReader, Cursor, Read, Seek, Write};
use std::path::{Path, PathBuf};
use std::{env, io};

use data_encoding::BASE64URL_NOPAD;
use fs_err as fs;
use fs_err::{DirEntry, File};
use mailparse::parse_headers;
use rustc_hash::FxHashMap;
use sha2::{Digest, Sha256};
use tracing::{instrument, warn};
Expand Down Expand Up @@ -353,10 +354,10 @@ pub enum LibKind {
/// Parse WHEEL file.
///
/// > {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same
/// > basic key: value format:
/// > email message format:
pub fn parse_wheel_file(wheel_text: &str) -> Result<LibKind, Error> {
// {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same basic key: value format:
let data = parse_key_value_file(&mut wheel_text.as_bytes(), "WHEEL")?;
// {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same email message format:
let data = parse_email_message_file(&mut wheel_text.as_bytes(), "WHEEL")?;

// Determine whether Root-Is-Purelib == ‘true’.
// If it is, the wheel is pure, and should be installed into purelib.
Expand Down Expand Up @@ -797,29 +798,39 @@ pub fn read_record_file(record: &mut impl Read) -> Result<Vec<RecordEntry>, Erro
.collect()
}

/// Parse a file with `Key: value` entries such as WHEEL and METADATA
fn parse_key_value_file(
/// Parse a file with email message format such as WHEEL and METADATA
fn parse_email_message_file(
file: impl Read,
debug_filename: &str,
) -> Result<FxHashMap<String, Vec<String>>, Error> {
let mut data: FxHashMap<String, Vec<String>> = FxHashMap::default();

let file = BufReader::new(file);
for (line_no, line) in file.lines().enumerate() {
let line = line?.trim().to_string();
if line.is_empty() {
continue;
let content = file.bytes().collect::<Result<Vec<u8>, _>>()?;

let headers = parse_headers(content.as_slice())
.map_err(|err| {
Error::InvalidWheel(format!("Failed to parse {debug_filename} file: {err}"))
})?
.0;

for header in headers {
let mut name = header.get_key();
let mut value = header.get_value();

// Trim the name and value only if needed, avoiding unnecessary allocations with .trim().to_string().
let trimmed_name = name.trim();
if name == trimmed_name {
name = trimmed_name.to_string();
}
let (key, value) = line.split_once(':').ok_or_else(|| {
Error::InvalidWheel(format!(
"Line {} of the {debug_filename} file is invalid",
line_no + 1
))
})?;
data.entry(key.trim().to_string())
.or_default()
.push(value.trim().to_string());
let trimmed_value = value.trim();
if value == trimmed_value {
value = trimmed_value.to_string();
}

data.entry(name).or_default().push(value);
}

Ok(data)
}

Expand All @@ -836,11 +847,11 @@ mod test {
use crate::Error;

use super::{
get_script_executable, parse_key_value_file, parse_wheel_file, read_record_file, Script,
get_script_executable, parse_email_message_file, parse_wheel_file, read_record_file, Script,
};

#[test]
fn test_parse_key_value_file() {
fn test_parse_email_message_file() {
let text = indoc! {"
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Expand All @@ -849,7 +860,21 @@ mod test {
Tag: cp38-cp38-manylinux2014_x86_64
"};

parse_key_value_file(&mut text.as_bytes(), "WHEEL").unwrap();
parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
}

#[test]
fn test_parse_email_message_file_with_value_starting_with_linesep_and_two_space() {
// Check: https://files.pythonhosted.org/packages/0c/b7/ecfdce6368cc3664d301f7f52db4fe1004aa7da7a12c4a9bf1de534ff6ab/ziglang-0.13.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl
let text = indoc! {"
Wheel-Version: 1.0
Generator: ziglang make_wheels.py
Root-Is-Purelib: false
Tag:
py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64
"};

parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
}

#[test]
Expand Down Expand Up @@ -996,7 +1021,7 @@ mod test {
"
};
let reader = Cursor::new(wheel.to_string().into_bytes());
let wheel_file = parse_key_value_file(reader, "WHEEL")?;
let wheel_file = parse_email_message_file(reader, "WHEEL")?;
assert_eq!(
wheel_file.get("Wheel-Version"),
Some(&["1.0".to_string()].to_vec())
Expand Down

0 comments on commit 2bfc450

Please sign in to comment.