Skip to content

Commit

Permalink
Add basic db <-> index consistency check
Browse files Browse the repository at this point in the history
Checks just whether any crates/versions don't exist, but can easily be
expanded with validation of other details.
  • Loading branch information
Nemo157 authored and Joshua Nelson committed Aug 14, 2020
1 parent e6064a6 commit 78527ca
Show file tree
Hide file tree
Showing 12 changed files with 434 additions and 1 deletion.
53 changes: 53 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ edition = "2018"
log = "0.4"
regex = "1"
structopt = "0.3"
crates-index = "0.15.0"
crates-index-diff = "7"
reqwest = { version = "0.10.6", features = ["blocking", "json"] } # TODO: Remove blocking when async is ready
semver = { version = "0.9", features = ["serde"] }
Expand Down
15 changes: 15 additions & 0 deletions src/bin/cratesfyi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,13 @@ enum DatabaseSubcommand {
#[structopt(subcommand)]
command: BlacklistSubcommand,
},

/// Compares the database with the index and resolves inconsistencies
Synchronize {
/// Don't actually resolve the inconsistencies, just log them
#[structopt(long)]
dry_run: bool,
},
}

impl DatabaseSubcommand {
Expand Down Expand Up @@ -480,6 +487,14 @@ impl DatabaseSubcommand {
} => db::delete_crate(&mut *ctx.conn()?, &*ctx.storage()?, &name)
.context("failed to delete the crate")?,
Self::Blacklist { command } => command.handle_args(ctx)?,

Self::Synchronize { dry_run } => {
cratesfyi::utils::consistency::run_check(
&*ctx.config()?,
&mut *ctx.conn()?,
dry_run,
)?;
}
}
Ok(())
}
Expand Down
55 changes: 55 additions & 0 deletions src/index/crates.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crates_index::Crate;
use failure::ResultExt;
use std::io::{Seek, SeekFrom, Write};

pub(crate) struct Crates {
repo: git2::Repository,
}

impl Crates {
pub(super) fn new(repo: git2::Repository) -> Self {
Self { repo }
}

pub(crate) fn walk(&self, mut f: impl FnMut(Crate)) -> Result<(), failure::Error> {
log::debug!("Walking crates in index");
let tree = self
.repo
.find_commit(self.repo.refname_to_id("refs/remotes/origin/master")?)?
.tree()?;

// crates_index doesn't publicly expose their slice constructor, so need to write each blob
// to a file before loading it as a `Crate`.
let mut tmp = tempfile::NamedTempFile::new()?;

let mut result = Ok(());

tree.walk(git2::TreeWalkMode::PreOrder, |_, entry| {
result = (|| {
if let Some(blob) = entry.to_object(&self.repo)?.as_blob() {
tmp.write_all(blob.content())?;
if let Ok(krate) = Crate::new(tmp.path()) {
f(krate);
} else {
log::warn!("Not a crate {}", entry.name().unwrap());
}
tmp.as_file().set_len(0)?;
tmp.seek(SeekFrom::Start(0))?;
}
Result::<(), failure::Error>::Ok(())
})()
.with_context(|_| {
format!(
"Loading crate details from {}",
entry.name().unwrap_or_default()
)
});
match result {
Ok(_) => git2::TreeWalkResult::Ok,
Err(_) => git2::TreeWalkResult::Abort,
}
})?;

Ok(result?)
}
}
14 changes: 13 additions & 1 deletion src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ use std::path::{Path, PathBuf};

use url::Url;

use self::api::Api;
use self::{api::Api, crates::Crates};
use crate::error::Result;
use failure::ResultExt;

pub(crate) mod api;
mod crates;

pub struct Index {
path: PathBuf,
Expand Down Expand Up @@ -56,6 +57,17 @@ impl Index {
Ok(diff)
}

pub(crate) fn crates(&self) -> Result<Crates> {
// First ensure the index is up to date, peeking will pull the latest changes without
// affecting anything else.
log::debug!("Updating index");
self.diff()?.peek_changes()?;
// It'd be nice to use `crates_index` directly for interacting with the index, but it
// doesn't support bare repositories. So we use its `Crate` type but walk the index
// ourselves.
Ok(Crates::new(git2::Repository::open(&self.path)?))
}

pub fn api(&self) -> &Api {
&self.api
}
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! [Docs.rs](https://docs.rs) (formerly cratesfyi) is an open source project to host
//! documentation of crates for the Rust Programming Language.
#![allow(clippy::cognitive_complexity)]
#![feature(type_alias_impl_trait)]

pub use self::build_queue::BuildQueue;

pub use self::config::Config;
pub use self::context::Context;
pub use self::docbuilder::options::DocBuilderOptions;
Expand Down
32 changes: 32 additions & 0 deletions src/utils/consistency/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use std::{collections::BTreeMap, fmt::Debug};

#[derive(Default, Debug)]
pub(crate) struct Data {
pub(crate) crates: BTreeMap<CrateId, Crate>,
}

#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)]
pub(crate) struct CrateId(pub(crate) String);

#[derive(Default, Debug)]
pub(crate) struct Crate {
pub(crate) releases: BTreeMap<Version, Release>,
}

#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)]
pub(crate) struct Version(pub(crate) String);

#[derive(Default, Debug)]
pub(crate) struct Release {}

impl std::fmt::Display for CrateId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

impl std::fmt::Display for Version {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
59 changes: 59 additions & 0 deletions src/utils/consistency/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use super::data::{Crate, CrateId, Data, Release, Version};
use std::collections::BTreeMap;

pub(crate) fn load(conn: &mut postgres::Client) -> Result<Data, failure::Error> {
let rows = conn.query(
"
SELECT
crates.name,
releases.version
FROM crates
INNER JOIN releases ON releases.crate_id = crates.id
ORDER BY crates.id, releases.id
",
&[],
)?;

let mut data = Data {
crates: BTreeMap::new(),
};

let mut rows = rows.iter();

struct Current {
id: CrateId,
krate: Crate,
}

let mut current = if let Some(row) = rows.next() {
Current {
id: CrateId(row.get("name")),
krate: Crate {
releases: {
let mut releases = BTreeMap::new();
releases.insert(Version(row.get("version")), Release {});
releases
},
},
}
} else {
return Ok(data);
};

for row in rows {
if row.get::<_, String>("name") != current.id.0 {
data.crates.insert(
std::mem::replace(&mut current.id, CrateId(row.get("name"))),
std::mem::take(&mut current.krate),
);
}
current
.krate
.releases
.insert(Version(row.get("version")), Release::default());
}

data.crates.insert(current.id, current.krate);

Ok(data)
}
Loading

0 comments on commit 78527ca

Please sign in to comment.