Skip to content

Commit

Permalink
Update crates.io index when necessary to avoid errors
Browse files Browse the repository at this point in the history
If we would generate an error due to an audit-as-crates-io policy being
specified for a crate which doesn't appear to be in the index, it may be
because the local copy of the index is out-of-date.

With these changes, if we notice the index is out of date in this way,
we'll update our local copy of the index and re-run our checks.

This should avoid potential issues with new audits being added for
audit-as-crates-io = true crates reporting errors on machines which have
an out-of-date copy of the index.
  • Loading branch information
mystor committed Oct 20, 2022
1 parent 7257bfc commit 1522808
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 45 deletions.
4 changes: 0 additions & 4 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ pub enum AuditAsError {
#[error(transparent)]
#[diagnostic(transparent)]
ShouldntBeAuditAs(ShouldntBeAuditAsErrors),
// FIXME: we should probably just make the caller pass this in?
#[error(transparent)]
#[diagnostic(transparent)]
CacheAcquire(CacheAcquireError),
}

#[derive(Debug, Error, Diagnostic)]
Expand Down
33 changes: 24 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1342,7 +1342,8 @@ fn fix_audit_as(cfg: &Config, store: &mut Store) -> Result<(), CacheAcquireError
// NOTE: In the future this might require Network, but for now `cargo metadata` is a precondition
// and guarantees a fully populated and up to date index, so we can just rely on that and know
// this is Networkless.
let issues = check_audit_as_crates_io(cfg, store);
let mut cache = Cache::acquire(cfg)?;
let issues = check_audit_as_crates_io(cfg, store, &mut cache);
if let Err(AuditAsErrors { errors }) = issues {
for error in errors {
match error {
Expand All @@ -1366,7 +1367,6 @@ fn fix_audit_as(cfg: &Config, store: &mut Store) -> Result<(), CacheAcquireError
.audit_as_crates_io = Some(false);
}
}
AuditAsError::CacheAcquire(err) => return Err(err),
}
}
}
Expand Down Expand Up @@ -1487,7 +1487,8 @@ fn cmd_check(out: &Arc<dyn Out>, cfg: &Config, sub_args: &CheckArgs) -> Result<(

if !cfg.cli.locked {
// Check if any of our first-parties are in the crates.io registry
check_audit_as_crates_io(cfg, &store)?;
let mut cache = Cache::acquire(cfg).into_diagnostic()?;
check_audit_as_crates_io(cfg, &store, &mut cache)?;
}

// DO THE THING!!!!
Expand Down Expand Up @@ -2026,14 +2027,20 @@ fn first_party_packages_strict<'a>(
.filter(move |package| !package.is_third_party(&empty_policy))
}

fn check_audit_as_crates_io(cfg: &Config, store: &Store) -> Result<(), AuditAsErrors> {
let cache = Cache::acquire(cfg).map_err(|e| AuditAsErrors {
errors: vec![AuditAsError::CacheAcquire(e)],
})?;
fn check_audit_as_crates_io(
cfg: &Config,
store: &Store,
cache: &mut Cache,
) -> Result<(), AuditAsErrors> {
// If we don't have a registry, we can't check audit-as-crates-io.
if !cache.has_registry() {
return Ok(());
}

let mut needs_audit_as_entry = vec![];
let mut shouldnt_be_audit_as = vec![];

'packages: for package in first_party_packages_strict(&cfg.metadata, &store.config) {
for package in first_party_packages_strict(&cfg.metadata, &store.config) {
let audit_policy = store
.config
.policy
Expand All @@ -2055,13 +2062,21 @@ fn check_audit_as_crates_io(cfg: &Config, store: &Store) -> Result<(), AuditAsEr
});
}
// Now that we've found a version match, we're done with this package
continue 'packages;
continue;
}
}

// If we reach this point, then we couldn't find a matching package in the registry,
// So any `audit-as-crates-io = true` is an error that should be corrected
if audit_policy == Some(true) {
// We found a crate which someone thought was on crates-io, but
// doesn't appear to be according to our local index.
// Before reporting an error, ensure the index is up to date,
// and restart if it was not.
if cache.ensure_index_up_to_date() {
return check_audit_as_crates_io(cfg, store, cache);
}

shouldnt_be_audit_as.push(ShouldntBeAuditAsError {
package: package.name.clone(),
version: package.version.clone(),
Expand Down
79 changes: 55 additions & 24 deletions src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use std::{
};

use cargo_metadata::Version;
use crates_index::Index;
use flate2::read::GzDecoder;
use futures_util::future::try_join_all;
use miette::{NamedSource, SourceOffset};
Expand All @@ -33,12 +32,20 @@ use crate::{
SortedMap, SAFE_TO_DEPLOY, SAFE_TO_RUN,
},
network::Network,
out::{progress_bar, IncProgressOnDrop},
out::{indeterminate_spinner, progress_bar, IncProgressOnDrop},
resolver::{self, Conclusion},
serialization::{spanned::Spanned, to_formatted_toml},
Config, PartialConfig,
};

/// The type to use for accessing information from crates.io.
#[cfg(not(test))]
type CratesIndex = crates_index::Index;

/// When running tests, a mock index is used instead of the real one.
#[cfg(test)]
type CratesIndex = crate::tests::MockIndex;

// tmp cache for various shenanigans
const CACHE_DIFF_CACHE: &str = "diff-cache.toml";
const CACHE_COMMAND_HISTORY: &str = "command-history.json";
Expand Down Expand Up @@ -818,11 +825,13 @@ async fn fetch_imported_audit(
/// A Registry in CARGO_HOME (usually the crates.io one)
pub struct CargoRegistry {
/// The queryable index
index: Index,
index: CratesIndex,
/// The base path all registries share (`$CARGO_HOME/registry`)
base_dir: PathBuf,
/// The name of the registry (`github.com-1ecc6299db9ec823`)
registry: OsString,
/// Whether or not the index is known to be up-to-date
index_up_to_date: bool,
}

impl CargoRegistry {
Expand Down Expand Up @@ -905,12 +914,19 @@ impl Drop for Cache {
impl Cache {
/// Acquire the cache
pub fn acquire(cfg: &PartialConfig) -> Result<Self, CacheAcquireError> {
// Try to get the cargo registry
let cargo_registry = find_cargo_registry();
if let Err(e) = &cargo_registry {
// ERRORS: this warning really rides the line, I'm not sure if the user can/should care
warn!("Couldn't find cargo registry: {e}");
}

if cfg.mock_cache {
// We're in unit tests, everything should be mocked and not touch real caches
return Ok(Cache {
_lock: None,
root: None,
cargo_registry: None,
cargo_registry: cargo_registry.ok(),
diff_cache_path: None,
command_history_path: None,
diff_semaphore: tokio::sync::Semaphore::new(MAX_CONCURRENT_DIFFS),
Expand Down Expand Up @@ -968,13 +984,6 @@ impl Cache {
.and_then(|f| load_json(f).ok())
.unwrap_or_default();

// Try to get the cargo registry
let cargo_registry = find_cargo_registry();
if let Err(e) = &cargo_registry {
// ERRORS: this warning really rides the line, I'm not sure if the user can/should care
warn!("Couldn't find cargo registry: {e}");
}

Ok(Self {
_lock: Some(lock),
root: Some(root),
Expand All @@ -991,27 +1000,48 @@ impl Cache {
})
}

/// Check if the Cache has access to the registry or information about the
/// crates.io index.
pub fn has_registry(&self) -> bool {
self.cargo_registry.is_some()
}

/// Ensures that the local copy of the crates.io index has the most
/// up-to-date information about what crates are available.
///
/// Returns `true` if the state of the index may have been changed by this
/// call, and `false` if the index is already up-to-date.
pub fn ensure_index_up_to_date(&mut self) -> bool {
let reg = match &mut self.cargo_registry {
Some(reg) => reg,
None => return false,
};
if reg.index_up_to_date {
return false;
}
let _spinner = indeterminate_spinner("Updating", "registry index");
reg.index_up_to_date = true;
match reg.index.update() {
Ok(()) => true,
Err(e) => {
warn!("Couldn't update cargo index: {e}");
false
}
}
}

/// Gets any information the crates.io index has on this package, locally
/// with no downloads. The fact that we invoke `cargo metadata` on startup
/// means the index should be as populated as we're able to get it.
/// with no downloads. The index may be out of date, however a caller can
/// use `ensure_index_up_to_date` to make sure it is up to date before
/// calling this method.
///
/// However this may do some expensive disk i/o, so ideally we should do
/// some bulk processing of this later. For now let's get it working...
#[cfg(not(test))]
pub fn query_package_from_index(&self, name: PackageStr) -> Option<crates_index::Crate> {
let reg = self.cargo_registry.as_ref()?;
reg.index.crate_(name)
}

#[cfg(test)]
pub fn query_package_from_index(&self, name: PackageStr) -> Option<crates_index::Crate> {
if let Some(reg) = self.cargo_registry.as_ref() {
reg.index.crate_(name)
} else {
crate::tests::MockRegistry::testing_cinematic_universe().package(name)
}
}

#[tracing::instrument(skip(self, network), err)]
pub async fn fetch_package(
&self,
Expand Down Expand Up @@ -1545,7 +1575,7 @@ fn find_cargo_registry() -> Result<CargoRegistry, crates_index::Error> {
// ERRORS: all of this is genuinely fallible internal workings
// but if these path adjustments don't work then something is very fundamentally wrong

let index = Index::new_cargo_default()?;
let index = CratesIndex::new_cargo_default()?;

let base_dir = index.path().parent().unwrap().parent().unwrap().to_owned();
let registry = index.path().file_name().unwrap().to_owned();
Expand All @@ -1554,6 +1584,7 @@ fn find_cargo_registry() -> Result<CargoRegistry, crates_index::Error> {
index,
base_dir,
registry,
index_up_to_date: false,
})
}

Expand Down
28 changes: 26 additions & 2 deletions src/tests/audit_as_crates_io.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use super::*;

fn get_audit_as_crates_io(cfg: &Config, store: &Store) -> String {
let res = crate::check_audit_as_crates_io(cfg, store);
let mut cache = crate::storage::Cache::acquire(cfg).unwrap();
let res = crate::check_audit_as_crates_io(cfg, store, &mut cache);
match res {
Ok(()) => String::new(),
Err(e) => format!("{:?}", miette::Report::new(e)),
}
}

fn get_audit_as_crates_io_json(cfg: &Config, store: &Store) -> String {
let res = crate::check_audit_as_crates_io(cfg, store);
let mut cache = crate::storage::Cache::acquire(cfg).unwrap();
let res = crate::check_audit_as_crates_io(cfg, store, &mut cache);
match res {
Ok(()) => String::new(),
Err(e) => {
Expand Down Expand Up @@ -232,3 +234,25 @@ fn cycle_audit_as_crates_io() {
let output = get_audit_as_crates_io(&cfg, &store);
insta::assert_snapshot!("cycle-audit-as-crates-io", output);
}

#[test]
fn audit_as_crates_io_need_update() {
// The "root" and "first" packages don't have their versions on crates.io
// until the index is updated. If an `audit_as_crates_io` entry requires
// `first` to be on crates.io, and it's not, we'll do an update, which
// should reveal that `root` also needs to be audited.

let _enter = TEST_RUNTIME.enter();

let mock = MockMetadata::haunted_tree();
let metadata = mock.metadata();
let (mut config, audits, imports) = builtin_files_full_audited(&metadata);
config
.policy
.insert("first".to_owned(), audit_as_policy(Some(true)));
let store = Store::mock(config, audits, imports);
let cfg = mock_cfg(&metadata);

let output = get_audit_as_crates_io(&cfg, &store);
insta::assert_snapshot!("audit-as-crates-io-need-update", output);
}
33 changes: 27 additions & 6 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{
collections::BTreeMap,
ffi::OsString,
fmt, fs, io,
path::PathBuf,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};

Expand Down Expand Up @@ -110,8 +110,9 @@ struct MockDependency {
version: Version,
}

pub struct MockRegistry {
pub struct MockIndex {
packages: FastMap<PackageStr<'static>, Vec<MockRegistryVersion>>,
path: PathBuf,
}

struct MockRegistryVersion {
Expand All @@ -127,9 +128,12 @@ fn reg_ver(pub_ver: u64) -> MockRegistryVersion {
}
}

impl MockRegistry {
pub fn testing_cinematic_universe() -> Self {
Self {
// The `MockIndex` type should provide the same interface as the
// `crates_index::Index` type, as it is used in place of that type in test
// builds.
impl MockIndex {
pub fn new_cargo_default() -> Result<Self, crates_index::Error> {
Ok(Self {
packages: [
("root-package", vec![reg_ver(DEFAULT_VER)]),
("third-party1", vec![reg_ver(DEFAULT_VER)]),
Expand Down Expand Up @@ -157,12 +161,26 @@ impl MockRegistry {
("dev-cycle-indirect", vec![reg_ver(DEFAULT_VER)]),
("third-normal", vec![reg_ver(DEFAULT_VER)]),
("third-dev", vec![reg_ver(DEFAULT_VER)]),
("first", vec![reg_ver(1)]),
]
.into_iter()
.collect(),
path: Path::new(env!("CARGO_MANIFEST_DIR"))
.join("/tests/mock-registry/index/testing-cinematic-universe"),
})
}
pub fn update(&mut self) -> Result<(), crates_index::Error> {
if self.packages.contains_key("new-package") {
return Ok(()); // already updated
}
self.packages.insert("root", vec![reg_ver(DEFAULT_VER)]);
self.packages
.get_mut("first")
.unwrap()
.push(reg_ver(DEFAULT_VER));
Ok(())
}
pub fn package(&self, name: PackageStr) -> Option<crates_index::Crate> {
pub fn crate_(&self, name: PackageStr) -> Option<crates_index::Crate> {
use std::io::Write;

let package_entry = self.packages.get(name)?;
Expand Down Expand Up @@ -222,6 +240,9 @@ impl MockRegistry {
.expect("failed to parse mock crates index file");
Some(result)
}
pub fn path(&self) -> &Path {
&self.path
}
}

impl Default for MockPackage {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: src/tests/audit_as_crates_io.rs
expression: output
---

× There are some issues with your policy.audit-as-crates-io entries

Error:
× Some non-crates.io-fetched packages match published crates.io versions
root:10.0.0
help: Add a `policy.*.audit-as-crates-io` entry for them

0 comments on commit 1522808

Please sign in to comment.