Skip to content

Commit

Permalink
Parses wheels WHEEL and METADATA files as email messages
Browse files Browse the repository at this point in the history
Fixing some wheel parsing crashes, like for the ziglang package on
Linux.
  • Loading branch information
Coruscant11 committed Aug 25, 2024
1 parent 1c58072 commit d4bcc49
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 23 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/install-wheel-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ configparser = { workspace = true }
csv = { workspace = true }
data-encoding = { workspace = true }
fs-err = { workspace = true }
mailparse = { workspace = true }
pathdiff = { workspace = true }
platform-info = { workspace = true }
reflink-copy = { workspace = true }
Expand Down
71 changes: 48 additions & 23 deletions crates/install-wheel-rs/src/wheel.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::collections::HashMap;
use std::io::{BufRead, BufReader, Cursor, Read, Seek, Write};
use std::io::{BufReader, Cursor, Read, Seek, Write};
use std::path::{Path, PathBuf};
use std::{env, io};

use data_encoding::BASE64URL_NOPAD;
use fs_err as fs;
use fs_err::{DirEntry, File};
use mailparse::parse_headers;
use rustc_hash::FxHashMap;
use sha2::{Digest, Sha256};
use tracing::{instrument, warn};
Expand Down Expand Up @@ -353,10 +354,10 @@ pub enum LibKind {
/// Parse WHEEL file.
///
/// > {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same
/// > basic key: value format:
/// > email message format:
pub fn parse_wheel_file(wheel_text: &str) -> Result<LibKind, Error> {
// {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same basic key: value format:
let data = parse_key_value_file(&mut wheel_text.as_bytes(), "WHEEL")?;
// {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same email message format:
let data = parse_email_message_file(&mut wheel_text.as_bytes(), "WHEEL")?;

// Determine whether Root-Is-Purelib == ‘true’.
// If it is, the wheel is pure, and should be installed into purelib.
Expand Down Expand Up @@ -797,29 +798,39 @@ pub fn read_record_file(record: &mut impl Read) -> Result<Vec<RecordEntry>, Erro
.collect()
}

/// Parse a file with `Key: value` entries such as WHEEL and METADATA
fn parse_key_value_file(
/// Parse a file with email message format such as WHEEL and METADATA
fn parse_email_message_file(
file: impl Read,
debug_filename: &str,
) -> Result<FxHashMap<String, Vec<String>>, Error> {
let mut data: FxHashMap<String, Vec<String>> = FxHashMap::default();

let file = BufReader::new(file);
for (line_no, line) in file.lines().enumerate() {
let line = line?.trim().to_string();
if line.is_empty() {
continue;
let content = file.bytes().collect::<Result<Vec<u8>, _>>()?;

let headers = parse_headers(content.as_slice())
.map_err(|err| {
Error::InvalidWheel(format!("Failed to parse {debug_filename} file: {err}"))
})?
.0;

for header in headers {
let mut name = header.get_key();
let mut value = header.get_value();

// Trim the name and value only if needed, avoiding unnecessary allocations with .trim().to_string().
let trimmed_name = name.trim();
if name == trimmed_name {
name = trimmed_name.to_string();
}
let (key, value) = line.split_once(':').ok_or_else(|| {
Error::InvalidWheel(format!(
"Line {} of the {debug_filename} file is invalid",
line_no + 1
))
})?;
data.entry(key.trim().to_string())
.or_default()
.push(value.trim().to_string());
let trimmed_value = value.trim();
if value == trimmed_value {
value = trimmed_value.to_string();
}

data.entry(name).or_default().push(value);
}

Ok(data)
}

Expand All @@ -836,11 +847,11 @@ mod test {
use crate::Error;

use super::{
get_script_executable, parse_key_value_file, parse_wheel_file, read_record_file, Script,
get_script_executable, parse_email_message_file, parse_wheel_file, read_record_file, Script,
};

#[test]
fn test_parse_key_value_file() {
fn test_parse_email_message_file() {
let text = indoc! {"
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Expand All @@ -849,7 +860,21 @@ mod test {
Tag: cp38-cp38-manylinux2014_x86_64
"};

parse_key_value_file(&mut text.as_bytes(), "WHEEL").unwrap();
parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
}

#[test]
fn test_parse_email_message_file_with_value_starting_with_linesep_and_two_space() {
// Check: https://files.pythonhosted.org/packages/0c/b7/ecfdce6368cc3664d301f7f52db4fe1004aa7da7a12c4a9bf1de534ff6ab/ziglang-0.13.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl
let text = indoc! {"
Wheel-Version: 1.0
Generator: ziglang make_wheels.py
Root-Is-Purelib: false
Tag:
py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64
"};

parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
}

#[test]
Expand Down Expand Up @@ -996,7 +1021,7 @@ mod test {
"
};
let reader = Cursor::new(wheel.to_string().into_bytes());
let wheel_file = parse_key_value_file(reader, "WHEEL")?;
let wheel_file = parse_email_message_file(reader, "WHEEL")?;
assert_eq!(
wheel_file.get("Wheel-Version"),
Some(&["1.0".to_string()].to_vec())
Expand Down

0 comments on commit d4bcc49

Please sign in to comment.