Skip to content

Commit

Permalink
Update crates.io index when necessary to avoid errors
Browse files Browse the repository at this point in the history
If we would generate an error due to an audit-as-crates-io policy being
specified for a crate which doesn't appear to be in the index, it may be
because the local copy of the index is out-of-date.

With these changes, if we notice the index is out of date in this way,
we'll update our local copy of the index and re-run our checks.

This should avoid potential issues with new audits being added for
audit-as-crates-io = true crates reporting errors on machines which have
an out-of-date copy of the index.
  • Loading branch information
mystor committed Oct 14, 2022
1 parent 7257bfc commit 2996e1a
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 78 deletions.
112 changes: 64 additions & 48 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2027,62 +2027,78 @@ fn first_party_packages_strict<'a>(
}

fn check_audit_as_crates_io(cfg: &Config, store: &Store) -> Result<(), AuditAsErrors> {
let cache = Cache::acquire(cfg).map_err(|e| AuditAsErrors {
let mut cache = Cache::acquire(cfg).map_err(|e| AuditAsErrors {
errors: vec![AuditAsError::CacheAcquire(e)],
})?;
let mut needs_audit_as_entry = vec![];
let mut shouldnt_be_audit_as = vec![];

'packages: for package in first_party_packages_strict(&cfg.metadata, &store.config) {
let audit_policy = store
.config
.policy
.get(&package.name)
.and_then(|policy| policy.audit_as_crates_io);
if audit_policy == Some(false) {
// They've explicitly said this is first-party so we don't care about what's in the registry
continue;
}

if let Some(index_entry) = cache.query_package_from_index(&package.name) {
if storage::exact_version(&index_entry, &package.version).is_some() {
// We found a version of this package in the registry!
if audit_policy == None {
// At this point, having no policy is an error
needs_audit_as_entry.push(NeedsAuditAsError {
package: package.name.clone(),
version: package.version.clone(),
});
// If we don't have a registry, we can't check audit-as-crates-io.
if !cache.has_registry() {
return Ok(());
}

'retry: loop {
let mut needs_audit_as_entry = vec![];
let mut shouldnt_be_audit_as = vec![];

'packages: for package in first_party_packages_strict(&cfg.metadata, &store.config) {
let audit_policy = store
.config
.policy
.get(&package.name)
.and_then(|policy| policy.audit_as_crates_io);
if audit_policy == Some(false) {
// They've explicitly said this is first-party so we don't care about what's in the registry
continue;
}

if let Some(index_entry) = cache.query_package_from_index(&package.name) {
if storage::exact_version(&index_entry, &package.version).is_some() {
// We found a version of this package in the registry!
if audit_policy == None {
// At this point, having no policy is an error
needs_audit_as_entry.push(NeedsAuditAsError {
package: package.name.clone(),
version: package.version.clone(),
});
}
// Now that we've found a version match, we're done with this package
continue 'packages;
}
// Now that we've found a version match, we're done with this package
continue 'packages;
}
}

// If we reach this point, then we couldn't find a matching package in the registry,
// So any `audit-as-crates-io = true` is an error that should be corrected
if audit_policy == Some(true) {
shouldnt_be_audit_as.push(ShouldntBeAuditAsError {
package: package.name.clone(),
version: package.version.clone(),
});
}
}
// If we reach this point, then we couldn't find a matching package in the registry,
// So any `audit-as-crates-io = true` is an error that should be corrected
if audit_policy == Some(true) {
// We found a crate which someone thought was on crates-io, but
// doesn't appear to be according to our local index.
// Before reporting an error, ensure the index is up to date,
// and restart if it was not.
if cache.ensure_index_up_to_date() {
continue 'retry;
}

if !needs_audit_as_entry.is_empty() || !shouldnt_be_audit_as.is_empty() {
let mut errors = vec![];
if !needs_audit_as_entry.is_empty() {
errors.push(AuditAsError::NeedsAuditAs(NeedsAuditAsErrors {
errors: needs_audit_as_entry,
}));
shouldnt_be_audit_as.push(ShouldntBeAuditAsError {
package: package.name.clone(),
version: package.version.clone(),
});
}
}
if !shouldnt_be_audit_as.is_empty() {
errors.push(AuditAsError::ShouldntBeAuditAs(ShouldntBeAuditAsErrors {
errors: shouldnt_be_audit_as,
}));

if !needs_audit_as_entry.is_empty() || !shouldnt_be_audit_as.is_empty() {
let mut errors = vec![];
if !needs_audit_as_entry.is_empty() {
errors.push(AuditAsError::NeedsAuditAs(NeedsAuditAsErrors {
errors: needs_audit_as_entry,
}));
}
if !shouldnt_be_audit_as.is_empty() {
errors.push(AuditAsError::ShouldntBeAuditAs(ShouldntBeAuditAsErrors {
errors: shouldnt_be_audit_as,
}));
}
return Err(AuditAsErrors { errors });
}
return Err(AuditAsErrors { errors });
}

Ok(())
return Ok(());
}
}
79 changes: 55 additions & 24 deletions src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use std::{
};

use cargo_metadata::Version;
use crates_index::Index;
use flate2::read::GzDecoder;
use futures_util::future::try_join_all;
use miette::{NamedSource, SourceOffset};
Expand All @@ -33,12 +32,20 @@ use crate::{
SortedMap, SAFE_TO_DEPLOY, SAFE_TO_RUN,
},
network::Network,
out::{progress_bar, IncProgressOnDrop},
out::{indeterminate_spinner, progress_bar, IncProgressOnDrop},
resolver::{self, Conclusion},
serialization::{spanned::Spanned, to_formatted_toml},
Config, PartialConfig,
};

/// The type to use for accessing information from crates.io.
#[cfg(not(test))]
type CratesIndex = crates_index::Index;

/// When running tests, a mock index is used instead of the real one.
#[cfg(test)]
type CratesIndex = crate::tests::MockIndex;

// tmp cache for various shenanigans
const CACHE_DIFF_CACHE: &str = "diff-cache.toml";
const CACHE_COMMAND_HISTORY: &str = "command-history.json";
Expand Down Expand Up @@ -818,11 +825,13 @@ async fn fetch_imported_audit(
/// A Registry in CARGO_HOME (usually the crates.io one)
pub struct CargoRegistry {
/// The queryable index
index: Index,
index: CratesIndex,
/// The base path all registries share (`$CARGO_HOME/registry`)
base_dir: PathBuf,
/// The name of the registry (`github.com-1ecc6299db9ec823`)
registry: OsString,
/// Whether or not the index is known to be up-to-date
index_up_to_date: bool,
}

impl CargoRegistry {
Expand Down Expand Up @@ -905,12 +914,19 @@ impl Drop for Cache {
impl Cache {
/// Acquire the cache
pub fn acquire(cfg: &PartialConfig) -> Result<Self, CacheAcquireError> {
// Try to get the cargo registry
let cargo_registry = find_cargo_registry();
if let Err(e) = &cargo_registry {
// ERRORS: this warning really rides the line, I'm not sure if the user can/should care
warn!("Couldn't find cargo registry: {e}");
}

if cfg.mock_cache {
// We're in unit tests, everything should be mocked and not touch real caches
return Ok(Cache {
_lock: None,
root: None,
cargo_registry: None,
cargo_registry: cargo_registry.ok(),
diff_cache_path: None,
command_history_path: None,
diff_semaphore: tokio::sync::Semaphore::new(MAX_CONCURRENT_DIFFS),
Expand Down Expand Up @@ -968,13 +984,6 @@ impl Cache {
.and_then(|f| load_json(f).ok())
.unwrap_or_default();

// Try to get the cargo registry
let cargo_registry = find_cargo_registry();
if let Err(e) = &cargo_registry {
// ERRORS: this warning really rides the line, I'm not sure if the user can/should care
warn!("Couldn't find cargo registry: {e}");
}

Ok(Self {
_lock: Some(lock),
root: Some(root),
Expand All @@ -991,27 +1000,48 @@ impl Cache {
})
}

/// Check if the Cache has access to the registry or information about the
/// crates.io index.
pub fn has_registry(&self) -> bool {
self.cargo_registry.is_some()
}

/// Ensures that the local copy of the crates.io index has the most
/// up-to-date information about what crates are available.
///
/// Returns `true` if the state of the index may have been changed by this
/// call, and `false` if the index is already up-to-date.
pub fn ensure_index_up_to_date(&mut self) -> bool {
let reg = match &mut self.cargo_registry {
Some(reg) => reg,
None => return false,
};
if reg.index_up_to_date {
return false;
}
let _spinner = indeterminate_spinner("Updating", "registry index");
reg.index_up_to_date = true;
match reg.index.update() {
Ok(()) => true,
Err(e) => {
warn!("Couldn't update cargo index: {e}");
false
}
}
}

/// Gets any information the crates.io index has on this package, locally
/// with no downloads. The fact that we invoke `cargo metadata` on startup
/// means the index should be as populated as we're able to get it.
/// with no downloads. The index may be out of date, however a caller can
/// use `ensure_index_up_to_date` to make sure it is up to date before
/// calling this method.
///
/// However this may do some expensive disk i/o, so ideally we should do
/// some bulk processing of this later. For now let's get it working...
#[cfg(not(test))]
pub fn query_package_from_index(&self, name: PackageStr) -> Option<crates_index::Crate> {
let reg = self.cargo_registry.as_ref()?;
reg.index.crate_(name)
}

#[cfg(test)]
pub fn query_package_from_index(&self, name: PackageStr) -> Option<crates_index::Crate> {
if let Some(reg) = self.cargo_registry.as_ref() {
reg.index.crate_(name)
} else {
crate::tests::MockRegistry::testing_cinematic_universe().package(name)
}
}

#[tracing::instrument(skip(self, network), err)]
pub async fn fetch_package(
&self,
Expand Down Expand Up @@ -1545,7 +1575,7 @@ fn find_cargo_registry() -> Result<CargoRegistry, crates_index::Error> {
// ERRORS: all of this is genuinely fallible internal workings
// but if these path adjustments don't work then something is very fundamentally wrong

let index = Index::new_cargo_default()?;
let index = CratesIndex::new_cargo_default()?;

let base_dir = index.path().parent().unwrap().parent().unwrap().to_owned();
let registry = index.path().file_name().unwrap().to_owned();
Expand All @@ -1554,6 +1584,7 @@ fn find_cargo_registry() -> Result<CargoRegistry, crates_index::Error> {
index,
base_dir,
registry,
index_up_to_date: false,
})
}

Expand Down
22 changes: 22 additions & 0 deletions src/tests/audit_as_crates_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,25 @@ fn cycle_audit_as_crates_io() {
let output = get_audit_as_crates_io(&cfg, &store);
insta::assert_snapshot!("cycle-audit-as-crates-io", output);
}

#[test]
fn audit_as_crates_io_need_update() {
// The "root" and "first" packages don't have their versions on crates.io
// until the index is updated. If an `audit_as_crates_io` entry requires
// `first` to be on crates.io, and it's not, we'll do an update, which
// should reveal that `root` also needs to be audited.

let _enter = TEST_RUNTIME.enter();

let mock = MockMetadata::haunted_tree();
let metadata = mock.metadata();
let (mut config, audits, imports) = builtin_files_full_audited(&metadata);
config
.policy
.insert("first".to_owned(), audit_as_policy(Some(true)));
let store = Store::mock(config, audits, imports);
let cfg = mock_cfg(&metadata);

let output = get_audit_as_crates_io(&cfg, &store);
insta::assert_snapshot!("audit-as-crates-io-need-update", output);
}
33 changes: 27 additions & 6 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{
collections::BTreeMap,
ffi::OsString,
fmt, fs, io,
path::PathBuf,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};

Expand Down Expand Up @@ -110,8 +110,9 @@ struct MockDependency {
version: Version,
}

pub struct MockRegistry {
pub struct MockIndex {
packages: FastMap<PackageStr<'static>, Vec<MockRegistryVersion>>,
path: PathBuf,
}

struct MockRegistryVersion {
Expand All @@ -127,9 +128,12 @@ fn reg_ver(pub_ver: u64) -> MockRegistryVersion {
}
}

impl MockRegistry {
pub fn testing_cinematic_universe() -> Self {
Self {
// The `MockIndex` type should provide the same interface as the
// `crates_index::Index` type, as it is used in place of that type in test
// builds.
impl MockIndex {
pub fn new_cargo_default() -> Result<Self, crates_index::Error> {
Ok(Self {
packages: [
("root-package", vec![reg_ver(DEFAULT_VER)]),
("third-party1", vec![reg_ver(DEFAULT_VER)]),
Expand Down Expand Up @@ -157,12 +161,26 @@ impl MockRegistry {
("dev-cycle-indirect", vec![reg_ver(DEFAULT_VER)]),
("third-normal", vec![reg_ver(DEFAULT_VER)]),
("third-dev", vec![reg_ver(DEFAULT_VER)]),
("first", vec![reg_ver(1)]),
]
.into_iter()
.collect(),
path: Path::new(env!("CARGO_MANIFEST_DIR"))
.join("/tests/mock-registry/index/testing-cinematic-universe"),
})
}
pub fn update(&mut self) -> Result<(), crates_index::Error> {
if self.packages.contains_key("new-package") {
return Ok(()); // already updated
}
self.packages.insert("root", vec![reg_ver(DEFAULT_VER)]);
self.packages
.get_mut("first")
.unwrap()
.push(reg_ver(DEFAULT_VER));
Ok(())
}
pub fn package(&self, name: PackageStr) -> Option<crates_index::Crate> {
pub fn crate_(&self, name: PackageStr) -> Option<crates_index::Crate> {
use std::io::Write;

let package_entry = self.packages.get(name)?;
Expand Down Expand Up @@ -222,6 +240,9 @@ impl MockRegistry {
.expect("failed to parse mock crates index file");
Some(result)
}
pub fn path(&self) -> &Path {
&self.path
}
}

impl Default for MockPackage {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: src/tests/audit_as_crates_io.rs
expression: output
---

× There are some issues with your policy.audit-as-crates-io entries

Error:
× Some non-crates.io-fetched packages match published crates.io versions
root:10.0.0
help: Add a `policy.*.audit-as-crates-io` entry for them

0 comments on commit 2996e1a

Please sign in to comment.