Skip to content

Commit

Permalink
Upload index metadata to S3 when publishing new crates (#4661)
Browse files Browse the repository at this point in the history
Also provides a new admin tool to bulk upload existing index files.
  • Loading branch information
arlosi authored May 19, 2022
1 parent 7281ca9 commit acb38cb
Show file tree
Hide file tree
Showing 43 changed files with 3,053 additions and 37 deletions.
8 changes: 8 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ export TEST_DATABASE_URL=
# not needed if the S3 bucket is in US standard
# export S3_REGION=

# Credentials for uploading index metadata to S3. You can leave these commented
# out if you're not publishing index metadata to s3 from your crates.io instance.
# export S3_INDEX_BUCKET=
# export S3_INDEX_ACCESS_KEY=
# export S3_INDEX_SECRET_KEY=
# not needed if the S3 bucket is in US standard
# export S3_INDEX_REGION=

# Upstream location of the registry index. Background jobs will push to
# this URL. The default points to a local index for development.
# Run `./script/init-local-index.sh` to initialize this repo.
Expand Down
73 changes: 66 additions & 7 deletions cargo-registry-index/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,18 +293,33 @@ impl Repository {
.join(Self::relative_index_file(name))
}

/// Returns the relative path to the crate index file.
/// Does not perform conversion to lowercase.
fn relative_index_file_helper(name: &str) -> Vec<&str> {
match name.len() {
1 => vec!["1", name],
2 => vec!["2", name],
3 => vec!["3", &name[..1], name],
_ => vec![&name[0..2], &name[2..4], name],
}
}

/// Returns the relative path to the crate index file that corresponds to
/// the given crate name.
/// the given crate name as a path (i.e. with platform-dependent folder separators).
///
/// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
pub fn relative_index_file(name: &str) -> PathBuf {
let name = name.to_lowercase();
match name.len() {
1 => Path::new("1").join(&name),
2 => Path::new("2").join(&name),
3 => Path::new("3").join(&name[..1]).join(&name),
_ => Path::new(&name[0..2]).join(&name[2..4]).join(&name),
}
Self::relative_index_file_helper(&name).iter().collect()
}

/// Returns the relative path to the crate index file that corresponds to
/// the given crate name for usage in URLs (i.e. with `/` separator).
///
/// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
pub fn relative_index_file_for_url(name: &str) -> String {
let name = name.to_lowercase();
Self::relative_index_file_helper(&name).join("/")
}

/// Returns the [Object ID](git2::Oid) of the currently checked out commit
Expand Down Expand Up @@ -343,6 +358,50 @@ impl Repository {
self.push("refs/heads/master")
}

/// Gets a list of files that have been modified since a given `starting_commit`
/// (use `starting_commit = None` for a list of all files).
pub fn get_files_modified_since(
&self,
starting_commit: Option<&str>,
) -> anyhow::Result<Vec<PathBuf>> {
let starting_commit = match starting_commit {
Some(starting_commit) => {
let oid = git2::Oid::from_str(starting_commit)
.context("failed to parse commit into Oid")?;
let commit = self
.repository
.find_commit(oid)
.context("failed to find commit")?;
Some(
commit
.as_object()
.peel_to_tree()
.context("failed to find tree for commit")?,
)
}
None => None,
};

let head = self
.repository
.find_commit(self.head_oid()?)?
.as_object()
.peel_to_tree()
.context("failed to find tree for HEAD")?;
let diff = self
.repository
.diff_tree_to_tree(starting_commit.as_ref(), Some(&head), None)
.context("failed to run diff")?;
let files = diff
.deltas()
.map(|delta| delta.new_file())
.filter(|file| file.exists())
.map(|file| file.path().unwrap().to_path_buf())
.collect();

Ok(files)
}

/// Push the current branch to the provided refname
fn push(&self, refspec: &str) -> anyhow::Result<()> {
let mut ref_status = Ok(());
Expand Down
9 changes: 8 additions & 1 deletion src/admin/delete_crate.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{admin::dialoguer, db, models::Crate, schema::crates};
use crate::{admin::dialoguer, config, db, models::Crate, schema::crates};

use diesel::prelude::*;
use reqwest::blocking::Client;

#[derive(clap::Parser, Debug)]
#[clap(
Expand All @@ -25,6 +26,10 @@ pub fn run(opts: Opts) {
fn delete(opts: Opts, conn: &PgConnection) {
let krate: Crate = Crate::by_name(&opts.crate_name).first(conn).unwrap();

let config = config::Base::from_environment();
let uploader = config.uploader();
let client = Client::new();

let prompt = format!(
"Are you sure you want to delete {} ({})?",
opts.crate_name, krate.id
Expand All @@ -42,4 +47,6 @@ fn delete(opts: Opts, conn: &PgConnection) {
if !dialoguer::confirm("commit?") {
panic!("aborting transaction");
}

uploader.delete_index(&client, &krate.name).unwrap();
}
1 change: 1 addition & 0 deletions src/admin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pub mod populate;
pub mod render_readmes;
pub mod test_pagerduty;
pub mod transfer_crates;
pub mod upload_index;
pub mod verify_token;
60 changes: 60 additions & 0 deletions src/admin/upload_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use std::time::{Duration, Instant};

use crate::admin::dialoguer;
use cargo_registry_index::{Repository, RepositoryConfig};
use reqwest::blocking::Client;

use crate::config;

#[derive(clap::Parser, Debug)]
#[clap(
name = "upload-index",
about = "Upload index from git to S3 (http-based index)"
)]
pub struct Opts {
/// Incremental commit. Any changed files made after this commit will be uploaded.
incremental_commit: Option<String>,
}

pub fn run(opts: Opts) -> anyhow::Result<()> {
let config = config::Base::from_environment();
let uploader = config.uploader();
let client = Client::new();

println!("fetching git repo");
let config = RepositoryConfig::from_environment();
let repo = Repository::open(&config)?;
repo.reset_head()?;
println!("HEAD is at {}", repo.head_oid()?);

let files = repo.get_files_modified_since(opts.incremental_commit.as_deref())?;
println!("found {} files to upload", files.len());
if !dialoguer::confirm("continue with upload?") {
return Ok(());
}

let mut progress_update_time = Instant::now();
for (i, file) in files.iter().enumerate() {
let crate_name = file.file_name().unwrap().to_str().unwrap();
let path = repo.index_file(crate_name);
if !path.exists() {
println!("skipping file `{}`", crate_name);
continue;
}
let contents = std::fs::read_to_string(&path)?;
uploader.upload_index(&client, crate_name, contents)?;

// Print a progress update every 10 seconds.
let now = Instant::now();
if now - progress_update_time > Duration::from_secs(10) {
progress_update_time = now;
println!("uploading {}/{}", i, files.len());
}
}

println!(
"uploading completed; use `upload-index {}` for an incremental run",
repo.head_oid()?
);
Ok(())
}
4 changes: 3 additions & 1 deletion src/bin/crates-admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use cargo_registry::admin::{
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
transfer_crates, verify_token,
transfer_crates, upload_index, verify_token,
};

#[derive(clap::Parser, Debug)]
Expand All @@ -22,6 +22,7 @@ enum SubCommand {
TransferCrates(transfer_crates::Opts),
VerifyToken(verify_token::Opts),
Migrate(migrate::Opts),
UploadIndex(upload_index::Opts),
}

fn main() -> anyhow::Result<()> {
Expand All @@ -38,6 +39,7 @@ fn main() -> anyhow::Result<()> {
SubCommand::TransferCrates(opts) => transfer_crates::run(opts),
SubCommand::VerifyToken(opts) => verify_token::run(opts).unwrap(),
SubCommand::Migrate(opts) => migrate::run(opts)?,
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
}

Ok(())
Expand Down
43 changes: 37 additions & 6 deletions src/config/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,24 @@ impl Base {

pub fn test() -> Self {
let uploader = Uploader::S3 {
bucket: s3::Bucket::new(
bucket: Box::new(s3::Bucket::new(
String::from("alexcrichton-test"),
None,
dotenv::var("S3_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_SECRET_KEY").unwrap_or_default(),
// When testing we route all API traffic over HTTP so we can
// sniff/record it, but everywhere else we use https
"http",
),
)),
index_bucket: Some(Box::new(s3::Bucket::new(
String::from("alexcrichton-test"),
None,
dotenv::var("S3_INDEX_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_INDEX_SECRET_KEY").unwrap_or_default(),
// When testing we route all API traffic over HTTP so we can
// sniff/record it, but everywhere else we use https
"http",
))),
cdn: None,
};
Self {
Expand All @@ -96,27 +105,49 @@ impl Base {
}

fn s3_panic_if_missing_keys() -> Uploader {
let index_bucket = match dotenv::var("S3_INDEX_BUCKET") {
Ok(name) => Some(Box::new(s3::Bucket::new(
name,
dotenv::var("S3_INDEX_REGION").ok(),
env("S3_INDEX_ACCESS_KEY"),
env("S3_INDEX_SECRET_KEY"),
"https",
))),
Err(_) => None,
};
Uploader::S3 {
bucket: s3::Bucket::new(
bucket: Box::new(s3::Bucket::new(
env("S3_BUCKET"),
dotenv::var("S3_REGION").ok(),
env("S3_ACCESS_KEY"),
env("S3_SECRET_KEY"),
"https",
),
)),
index_bucket,
cdn: dotenv::var("S3_CDN").ok(),
}
}

fn s3_maybe_read_only() -> Uploader {
let index_bucket = match dotenv::var("S3_INDEX_BUCKET") {
Ok(name) => Some(Box::new(s3::Bucket::new(
name,
dotenv::var("S3_INDEX_REGION").ok(),
dotenv::var("S3_INDEX_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_INDEX_SECRET_KEY").unwrap_or_default(),
"https",
))),
Err(_) => None,
};
Uploader::S3 {
bucket: s3::Bucket::new(
bucket: Box::new(s3::Bucket::new(
env("S3_BUCKET"),
dotenv::var("S3_REGION").ok(),
dotenv::var("S3_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_SECRET_KEY").unwrap_or_default(),
"https",
),
)),
index_bucket,
cdn: dotenv::var("S3_CDN").ok(),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
// Upload crate tarball
app.config
.uploader()
.upload_crate(&app, tarball, &krate, vers)?;
.upload_crate(app.http_client(), tarball, &krate, vers)?;

let (features, features2): (HashMap<_, _>, HashMap<_, _>) =
features.into_iter().partition(|(_k, vals)| {
Expand Down
67 changes: 67 additions & 0 deletions src/tests/http-data/krate_publish_features_version_2
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,72 @@
],
"body": ""
}
},
{
"request": {
"uri": "http://alexcrichton-test.s3.amazonaws.com/index/3/f/foo",
"method": "PUT",
"headers": [
[
"accept-encoding",
"gzip"
],
[
"accept",
"*/*"
],
[
"content-length",
"336"
],
[
"date",
"Fri, 15 Sep 2017 07:53:06 -0700"
],
[
"authorization",
"AWS AKIAICL5IWUZYWWKA7JA:uDc39eNdF6CcwB+q+JwKsoDLQc4="
],
[
"content-type",
"text/plain"
],
[
"host",
"alexcrichton-test.s3.amazonaws.com"
]
],
"body": "eyJuYW1lIjoiZm9vIiwidmVycyI6IjEuMC4wIiwiZGVwcyI6W3sibmFtZSI6ImJhciIsInJlcSI6Ij4gMCIsImZlYXR1cmVzIjpbXSwib3B0aW9uYWwiOmZhbHNlLCJkZWZhdWx0X2ZlYXR1cmVzIjp0cnVlLCJ0YXJnZXQiOm51bGwsImtpbmQiOiJub3JtYWwifV0sImNrc3VtIjoiYWNiNTYwNGIxMjZhYzg5NGMxZWIxMWM0NTc1YmYyMDcyZmVhNjEyMzJhODg4ZTQ1Mzc3MGM3OWQ3ZWQ1NjQxOSIsImZlYXR1cmVzIjp7Im9sZF9mZWF0IjpbXX0sImZlYXR1cmVzMiI6eyJuZXdfZmVhdCI6WyJkZXA6YmFyIiwiYmFyPy9mZWF0Il19LCJ5YW5rZWQiOmZhbHNlLCJsaW5rcyI6bnVsbCwidiI6Mn0K"
},
"response": {
"status": 200,
"headers": [
[
"x-amz-request-id",
"26589A5E52F8395C"
],
[
"x-amz-id-2",
"JdIvnNTw53aqXjBIqBLNuN4kxf/w1XWX+xuIiGBDYy7yzOSDuAMtBSrTW4ZWetcCIdqCUHuQ51A="
],
[
"content-length",
"0"
],
[
"Server",
"AmazonS3"
],
[
"date",
"Fri,15 Sep 2017 14:53:07 GMT"
],
[
"ETag",
"\"f9016ad360cebb4fe2e6e96e5949f022\""
]
],
"body": ""
}
}
]
Loading

0 comments on commit acb38cb

Please sign in to comment.