Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tarball: Use cargo_toml to parse Cargo.toml file #6914

Merged
merged 1 commit into from
Aug 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions crates_io_tarball/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ extern crate claims;
#[cfg(any(feature = "builder", test))]
pub use crate::builder::TarballBuilder;
use crate::limit_reader::LimitErrorReader;
pub use crate::manifest::Manifest;
use crate::manifest::validate_manifest;
pub use crate::vcs_info::CargoVcsInfo;
pub use cargo_toml::Manifest;
use flate2::read::GzDecoder;
use std::io::Read;
use std::path::Path;
Expand Down Expand Up @@ -35,7 +36,7 @@ pub enum TarballError {
#[error("Cargo.toml manifest is missing")]
MissingManifest,
#[error("Cargo.toml manifest is invalid: {0}")]
InvalidManifest(#[source] toml::de::Error),
InvalidManifest(#[from] cargo_toml::Error),
#[error(transparent)]
IO(#[from] std::io::Error),
}
Expand Down Expand Up @@ -97,7 +98,11 @@ pub fn process_tarball<R: Read>(
// erroring if it cannot be read.
let mut contents = String::new();
entry.read_to_string(&mut contents)?;
manifest = Some(toml::from_str(&contents).map_err(TarballError::InvalidManifest)?);
manifest = Some({
let manifest = Manifest::from_str(&contents)?;
validate_manifest(&manifest)?;
manifest
});
}
}

Expand Down Expand Up @@ -178,10 +183,10 @@ repository = "https://github.com/foo/bar"
let limit = 512 * 1024 * 1024;

let tarball_info = assert_ok!(process_tarball("foo-0.0.1", &*tarball, limit));
let package = tarball_info.manifest.package;
assert_some_eq!(package.readme.as_path(), Path::new("README.md"));
assert_some_eq!(package.repository, "https://github.com/foo/bar");
assert_some_eq!(package.rust_version, "1.59");
let package = assert_some!(tarball_info.manifest.package);
assert_some_eq!(package.readme().as_path(), Path::new("README.md"));
assert_some_eq!(package.repository(), "https://github.com/foo/bar");
assert_some_eq!(package.rust_version(), "1.59");
}

#[test]
Expand All @@ -200,8 +205,8 @@ repository = "https://github.com/foo/bar"
let limit = 512 * 1024 * 1024;

let tarball_info = assert_ok!(process_tarball("foo-0.0.1", &*tarball, limit));
let package = tarball_info.manifest.package;
assert_some_eq!(package.rust_version, "1.23");
let package = assert_some!(tarball_info.manifest.package);
assert_some_eq!(package.rust_version(), "1.23");
}

#[test]
Expand All @@ -219,8 +224,8 @@ repository = "https://github.com/foo/bar"
let limit = 512 * 1024 * 1024;

let tarball_info = assert_ok!(process_tarball("foo-0.0.1", &*tarball, limit));
let package = tarball_info.manifest.package;
assert_matches!(package.readme, OptionalFile::Flag(true));
let package = assert_some!(tarball_info.manifest.package);
assert_matches!(package.readme(), OptionalFile::Flag(true));
}

#[test]
Expand All @@ -239,8 +244,8 @@ repository = "https://github.com/foo/bar"
let limit = 512 * 1024 * 1024;

let tarball_info = assert_ok!(process_tarball("foo-0.0.1", &*tarball, limit));
let package = tarball_info.manifest.package;
assert_matches!(package.readme, OptionalFile::Flag(false));
let package = assert_some!(tarball_info.manifest.package);
assert_matches!(package.readme(), OptionalFile::Flag(false));
}

#[test]
Expand All @@ -260,7 +265,7 @@ repository = "https://github.com/foo/bar"
let limit = 512 * 1024 * 1024;

let tarball_info = assert_ok!(process_tarball("foo-0.0.1", &*tarball, limit));
let package = tarball_info.manifest.package;
assert_some_eq!(package.repository, "https://github.com/foo/bar");
let package = assert_some!(tarball_info.manifest.package);
assert_some_eq!(package.repository(), "https://github.com/foo/bar");
}
}
129 changes: 87 additions & 42 deletions crates_io_tarball/src/manifest.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,89 @@
use cargo_toml::OptionalFile;
use derive_deref::Deref;
use serde::{de, Deserialize, Deserializer};

#[derive(Debug, Deserialize)]
pub struct Manifest {
#[serde(alias = "project")]
pub package: Package,
}

#[derive(Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Package {
pub name: String,
pub version: String,
#[serde(default)]
pub readme: OptionalFile,
pub repository: Option<String>,
pub rust_version: Option<RustVersion>,
}

#[derive(Debug, Deref)]
pub struct RustVersion(String);

impl PartialEq<&str> for RustVersion {
fn eq(&self, other: &&str) -> bool {
self.0.eq(other)
}
}

impl<'de> Deserialize<'de> for RustVersion {
fn deserialize<D: Deserializer<'de>>(d: D) -> Result<RustVersion, D::Error> {
let s = String::deserialize(d)?;
match semver::VersionReq::parse(&s) {
// Exclude semver operators like `^` and pre-release identifiers
Ok(_) if s.chars().all(|c| c.is_ascii_digit() || c == '.') => Ok(RustVersion(s)),
Ok(_) | Err(..) => {
let value = de::Unexpected::Str(&s);
let expected = "a valid rust_version";
Err(de::Error::invalid_value(value, &expected))
}
}
use cargo_toml::{Dependency, DepsSet, Error, Inheritable, Manifest, Package};
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the extra validation worth it?

I'm concerned about us (very easily) adding new things that will then fail on publish. The person publishing would then need to figure out it is a bug in crates.io, report it, someone respond, create a PR upstream with cargo_toml, wait for a fix and publish, then updates and deploy crates.io.

Copy link
Member Author

@Turbo87 Turbo87 Aug 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

our long term goal is to reduce the reliance on the metadata JSON blob that is being sent together with the tarball.

an attacker could potentially publish something where the metadata says foo and the tarball says bar, and we would prefer to treat the tarball as the primary source of truth.

this however requires us to parse the Cargo.toml file and if parsing fails we can't accept the upload. we could keep rolling our own structure definitions, but as we've already seen with the readme field, this is quite error-prone.

as the PR description says cargo_toml works for almost all existing crates, so there isn't really a reason to write everything ourselves.

the extra validation is technically not needed if we assume that only cargo is used to publish new crates, but only relying on client-side validation seems quite risky given scenarios like https://blog.vlt.sh/blog/the-massive-hole-in-the-npm-ecosystem.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be reasonable to add logging / alerting such that we could detect an increase in failed toml parsing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, that seems like a reasonable idea :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm concerned about us (very easily) adding new things that will then fail on publish.

Wouldn't a breaking change like that cause bigger problems anyway, since crates.io is by no means the only downstream parser and consumer of cargo manifests?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not even talking about breaking backwards compatibility but breaking forward compatibility.

Think of cases like:

  • Add new supported values
  • Add new supported types

Cargo needs to have a strict parser for these because an unknown value could have a major impact on behavior. If another tool is acting off of the field, they should hard error as well. However, we need to be able to add new values or else cargo is stuck as-is. This is possible to support in serde using a catch-all variant. cargo_toml instead errors on unknown values.

A trickier one is adding new types which we've done but cargo_metadata also has hard errors for that.

So assuming we only maintain backward compatibility. Most likely we'll be playing whack-a-mole with cargo_toml and cargo_metadata for them to properly handle new values being emitted.

We then have to deal with making sure they are updated when the new values are out (at minimum, a strong communication path).

Tying this back to this PR, making most of cargo_toml loosey-goosey for compatibility removes most of the validation that crates-io can do.

Independent of all of that., in my opinion, is that pushing responsibility for this stuff to third-parties has made it so we are unaware of the problems their users face and it is easier to make innocuous changes that break them


pub fn validate_manifest(manifest: &Manifest) -> Result<(), Error> {
let package = manifest.package.as_ref();

// Check that a `[package]` table exists in the manifest, since crates.io
// does not accept workspace manifests.
let package = package.ok_or(Error::Other("missing field `package`"))?;

validate_package(package)?;

// These checks ensure that dependency workspace inheritance has been
// normalized by cargo before publishing.
if manifest.dependencies.is_inherited()
|| manifest.dev_dependencies.is_inherited()
|| manifest.build_dependencies.is_inherited()
{
return Err(Error::InheritedUnknownValue);
}

Ok(())
}

pub fn validate_package(package: &Package) -> Result<(), Error> {
// These checks ensure that package field workspace inheritance has been
// normalized by cargo before publishing.
if package.edition.is_inherited()
|| package.rust_version.is_inherited()
|| package.version.is_inherited()
|| package.authors.is_inherited()
|| package.description.is_inherited()
|| package.homepage.is_inherited()
|| package.documentation.is_inherited()
|| package.readme.is_inherited()
|| package.keywords.is_inherited()
|| package.categories.is_inherited()
|| package.exclude.is_inherited()
|| package.include.is_inherited()
|| package.license.is_inherited()
|| package.license_file.is_inherited()
|| package.repository.is_inherited()
|| package.publish.is_inherited()
{
return Err(Error::InheritedUnknownValue);
}

// Check that the `rust-version` field has a valid value, if it exists.
if let Some(rust_version) = package.rust_version() {
validate_rust_version(rust_version)?;
}

Ok(())
}

trait IsInherited {
fn is_inherited(&self) -> bool;
}

impl<T> IsInherited for Inheritable<T> {
fn is_inherited(&self) -> bool {
!self.is_set()
}
}

impl<T: IsInherited> IsInherited for Option<T> {
fn is_inherited(&self) -> bool {
self.as_ref().map(|it| it.is_inherited()).unwrap_or(false)
}
}

impl IsInherited for Dependency {
fn is_inherited(&self) -> bool {
matches!(self, Dependency::Inherited(_))
}
}

impl IsInherited for DepsSet {
fn is_inherited(&self) -> bool {
self.iter().any(|(_key, dep)| dep.is_inherited())
}
}

pub fn validate_rust_version(value: &str) -> Result<(), Error> {
match semver::VersionReq::parse(value) {
// Exclude semver operators like `^` and pre-release identifiers
Ok(_) if value.chars().all(|c| c.is_ascii_digit() || c == '.') => Ok(()),
Ok(_) | Err(..) => Err(Error::Other("invalid `rust-version` value")),
}
}
9 changes: 6 additions & 3 deletions src/admin/render_readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,14 @@ fn render_pkg_readme<R: Read>(mut archive: Archive<R>, pkg_name: &str) -> anyhow
let contents = find_file_by_path(&mut entries, Path::new(&path))
.context("Failed to read Cargo.toml file")?;

toml::from_str(&contents).context("Failed to parse manifest file")?
Manifest::from_str(&contents).context("Failed to parse manifest file")?

// We don't call `validate_manifest()` here since the additional validation is not needed
// and it would prevent us from reading a couple of legacy crate files.
};

let rendered = {
let readme = manifest.package.readme;
let readme = manifest.package().readme();
if !readme.is_some() {
return Ok("".to_string());
}
Expand All @@ -198,7 +201,7 @@ fn render_pkg_readme<R: Read>(mut archive: Archive<R>, pkg_name: &str) -> anyhow
text_to_html(
&contents,
&readme_path,
manifest.package.repository.as_deref(),
manifest.package().repository(),
pkg_path_in_vcs,
)
};
Expand Down
4 changes: 2 additions & 2 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra

let rust_version = tarball_info
.manifest
.package
.rust_version
.package()
.rust_version()
.map(|rv| rv.deref().to_string());

// Persist the new version of this crate
Expand Down
40 changes: 37 additions & 3 deletions src/tests/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1291,7 +1291,7 @@ fn invalid_manifest() {
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
response.into_json(),
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\nTOML parse error at line 1, column 1\n |\n1 | \n | ^\nmissing field `package`\n" }] })
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\nmissing field `name`\n" }] })
);
}

Expand Down Expand Up @@ -1339,7 +1339,7 @@ fn invalid_rust_version() {
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
response.into_json(),
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\nTOML parse error at line 4, column 16\n |\n4 | rust-version = \"\"\n | ^^\ninvalid value: string \"\", expected a valid rust_version\n" }] })
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\ninvalid `rust-version` value" }] })
);

let tarball = TarballBuilder::new("foo", "1.0.0")
Expand All @@ -1352,6 +1352,40 @@ fn invalid_rust_version() {
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
response.into_json(),
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\nTOML parse error at line 4, column 16\n |\n4 | rust-version = \"1.0.0-beta.2\"\n | ^^^^^^^^^^^^^^\ninvalid value: string \"1.0.0-beta.2\", expected a valid rust_version\n" }] })
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\ninvalid `rust-version` value" }] })
);
}

#[test]
fn workspace_inheritance() {
let (_app, _anon, _cookie, token) = TestApp::full().with_token();

let tarball = TarballBuilder::new("foo", "1.0.0")
.add_raw_manifest(b"[package]\nname = \"foo\"\nversion.workspace = true\n")
.build();

let response = token.publish_crate(PublishBuilder::new("foo", "1.0.0").tarball(tarball));
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
response.into_json(),
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\nvalue from workspace hasn't been set" }] })
);
}

#[test]
fn workspace_inheritance_with_dep() {
let (_app, _anon, _cookie, token) = TestApp::full().with_token();

let tarball = TarballBuilder::new("foo", "1.0.0")
.add_raw_manifest(
b"[package]\nname = \"foo\"\nversion = \"1.0.0\"\n\n[dependencies]\nserde.workspace = true\n",
)
.build();

let response = token.publish_crate(PublishBuilder::new("foo", "1.0.0").tarball(tarball));
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
response.into_json(),
json!({ "errors": [{ "detail": "failed to parse `Cargo.toml` manifest file\n\nvalue from workspace hasn't been set" }] })
);
}