diff --git a/Cargo.lock b/Cargo.lock index 22029affc..b8d26c34f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1456,6 +1456,7 @@ dependencies = [ "string_cache_codegen", "strum", "syntect", + "sysinfo", "tempfile", "tera", "test-case", @@ -3338,6 +3339,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "ntapi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc51db7b362b205941f71232e56c625156eb9a929f8cf74a428fd5bc094a4afc" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -5017,6 +5027,20 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "sysinfo" +version = "0.28.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e847e2de7a137c8c2cede5095872dbb00f4f9bf34d061347e36b43322acd56" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "winapi", +] + [[package]] name = "tagptr" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 91ecc7bb9..c07d8adcf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,7 @@ bzip2 = "0.4.4" serde_cbor = "0.11.1" getrandom = "0.2.1" itertools = { version = "0.10.5", optional = true} +sysinfo = { version = "0.28.2", default-features = false } rusqlite = { version = "0.29.0", features = ["bundled"] } moka = { version ="0.11.0", default-features = false, features = ["sync"]} diff --git a/docker-compose.yml b/docker-compose.yml index 36180aa3d..687d19cac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,10 +14,16 @@ services: expose: ["3000"] volumes: - "/var/run/docker.sock:/var/run/docker.sock" - - ".rustwide-docker:/opt/docsrs/rustwide" - - "cratesio-index:/opt/docsrs/prefix/crates.io-index" + # we only need one of the two mappings here. + # this will link to your local filesystem, which makes build debugging + # easier + - "./ignored/opt-docsrs:/opt/docsrs_ext/" + # this uses a docker volume and will be faster, but you need to + # log into the container to debug its contents + # - "prefix-ext:/opt/docsrs_ext/" environment: - DOCSRS_RUSTWIDE_WORKSPACE: /opt/docsrs/rustwide + DOCSRS_RUSTWIDE_WORKSPACE: /opt/docsrs_ext/rustwide + DOCSRS_PREFIX: /opt/docsrs_ext/prefix DOCSRS_DATABASE_URL: postgresql://cratesfyi:password@db DOCSRS_STORAGE_BACKEND: s3 S3_ENDPOINT: http://s3:9000 @@ -93,4 +99,4 @@ services: volumes: postgres-data: {} minio-data: {} - cratesio-index: {} + prefix-ext: {} diff --git a/dockerfiles/entrypoint.sh b/dockerfiles/entrypoint.sh index b8fdc61aa..3f19cb03d 100755 --- a/dockerfiles/entrypoint.sh +++ b/dockerfiles/entrypoint.sh @@ -2,7 +2,7 @@ set -euv -export DOCSRS_PREFIX=/opt/docsrs/prefix +export DOCSRS_PREFIX=${DOCSRS_PREFIX:-"/opt/docsrs/prefix"} export DOCSRS_DOCKER=true export DOCSRS_LOG=${DOCSRS_LOG-"docs-rs,rustwide=info"} export PATH="$PATH:/build/target/release" diff --git a/src/config.rs b/src/config.rs index 0cf907626..f880be602 100644 --- a/src/config.rs +++ b/src/config.rs @@ -93,8 +93,13 @@ pub struct Config { /// same for the `static.docs.rs` distribution pub cloudfront_distribution_id_static: Option, + // some caches do automatic cleanup based on this free disk space goal. + // Value shoudl be between 0 and 1. + pub(crate) free_disk_space_goal: f32, + // Build params pub(crate) build_attempts: u16, + pub(crate) use_build_artifact_cache: bool, pub(crate) rustwide_workspace: PathBuf, pub(crate) inside_docker: bool, pub(crate) docker_image: Option, @@ -129,7 +134,10 @@ impl Config { let prefix: PathBuf = require_env("DOCSRS_PREFIX")?; Ok(Self { + free_disk_space_goal: env("DOCSRS_FREE_DISK_SPACE_GOAL", 0.2)?, + build_attempts: env("DOCSRS_BUILD_ATTEMPTS", 5)?, + use_build_artifact_cache: env("DOCSRS_USE_BUILD_ARTIFACT_CACHE", true)?, crates_io_api_call_retries: env("DOCSRS_CRATESIO_API_CALL_RETRIES", 3)?, diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 1ba775c9a..1af14827c 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -2,7 +2,7 @@ use crate::{ db::types::Feature, docbuilder::{BuildResult, DocCoverage}, error::Result, - index::api::{CrateData, CrateOwner, ReleaseData}, + index::api::{CrateData, GithubUser, ReleaseData}, storage::CompressionAlgorithm, utils::MetadataPackage, web::crate_details::CrateDetails, @@ -371,7 +371,7 @@ pub fn update_crate_data_in_database( /// Adds owners into database fn update_owners_in_database( conn: &mut Client, - owners: &[CrateOwner], + owners: &[GithubUser], crate_id: i32, ) -> Result<()> { // Update any existing owner data since it is mutable and could have changed since last @@ -562,9 +562,10 @@ mod test { }, )?; - let owner1 = CrateOwner { + let owner1 = GithubUser { avatar: "avatar".into(), login: "login".into(), + ..Default::default() }; update_owners_in_database(&mut conn, &[owner1.clone()], crate_id)?; @@ -600,16 +601,18 @@ mod test { // set initial owner details update_owners_in_database( &mut conn, - &[CrateOwner { + &[GithubUser { login: "login".into(), avatar: "avatar".into(), + ..Default::default() }], crate_id, )?; - let updated_owner = CrateOwner { + let updated_owner = GithubUser { login: "login".into(), avatar: "avatar2".into(), + ..Default::default() }; update_owners_in_database(&mut conn, &[updated_owner.clone()], crate_id)?; @@ -645,17 +648,19 @@ mod test { // set initial owner details update_owners_in_database( &mut conn, - &[CrateOwner { + &[GithubUser { login: "login".into(), avatar: "avatar".into(), + ..Default::default() }], crate_id, )?; - let new_owners: Vec = (1..5) - .map(|i| CrateOwner { + let new_owners: Vec = (1..5) + .map(|i| GithubUser { login: format!("login{i}"), avatar: format!("avatar{i}"), + ..Default::default() }) .collect(); diff --git a/src/db/delete.rs b/src/db/delete.rs index 840459dcd..d8045c159 100644 --- a/src/db/delete.rs +++ b/src/db/delete.rs @@ -187,7 +187,7 @@ fn delete_crate_from_database(conn: &mut Client, name: &str, crate_id: i32) -> R #[cfg(test)] mod tests { use super::*; - use crate::index::api::CrateOwner; + use crate::index::api::GithubUser; use crate::test::{assert_success, wrapper}; use postgres::Client; use test_case::test_case; @@ -313,9 +313,10 @@ mod tests { .name("a") .version("1.0.0") .archive_storage(archive_storage) - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "malicious actor".into(), avatar: "https://example.org/malicious".into(), + ..Default::default() }) .create()?; assert!(release_exists(&mut db.conn(), v1)?); @@ -342,9 +343,10 @@ mod tests { .name("a") .version("2.0.0") .archive_storage(archive_storage) - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "Peter Rabbit".into(), avatar: "https://example.org/peter".into(), + ..Default::default() }) .create()?; assert!(release_exists(&mut db.conn(), v2)?); diff --git a/src/docbuilder/caching.rs b/src/docbuilder/caching.rs new file mode 100644 index 000000000..af705fddd --- /dev/null +++ b/src/docbuilder/caching.rs @@ -0,0 +1,349 @@ +use anyhow::{bail, Context as _, Result}; +use std::{ + collections::HashMap, + fs::{self, OpenOptions}, + path::{Path, PathBuf}, + time::SystemTime, +}; +use sysinfo::{DiskExt, RefreshKind, System, SystemExt}; +use tracing::{debug, instrument, warn}; + +static LAST_ACCESSED_FILE_NAME: &str = "docsrs_last_accessed"; + +/// gives you the percentage of free disk space on the +/// filesystem where the given `path` lives on. +/// Return value is between 0 and 1. +fn free_disk_space_ratio>(path: P) -> Result { + let sys = System::new_with_specifics(RefreshKind::new().with_disks().with_disks_list()); + + let disk_by_mount_point: HashMap<_, _> = + sys.disks().iter().map(|d| (d.mount_point(), d)).collect(); + + let path = path.as_ref(); + + if let Some(disk) = path.ancestors().find_map(|p| disk_by_mount_point.get(p)) { + Ok((disk.available_space() as f64 / disk.total_space() as f64) as f32) + } else { + bail!("could not find mount point for path {}", path.display()); + } +} + +/// artifact caching with cleanup +#[derive(Debug)] +pub(crate) struct ArtifactCache { + cache_dir: PathBuf, +} + +impl ArtifactCache { + pub(crate) fn new(cache_dir: PathBuf) -> Result { + let cache = Self { cache_dir }; + cache.ensure_cache_exists()?; + Ok(cache) + } + + pub(crate) fn purge(&self) -> Result<()> { + fs::remove_dir_all(&self.cache_dir)?; + self.ensure_cache_exists()?; + Ok(()) + } + + fn ensure_cache_exists(&self) -> Result<()> { + if !self.cache_dir.exists() { + fs::create_dir_all(&self.cache_dir)?; + } + Ok(()) + } + + /// clean up a target directory. + /// + /// Will: + /// * delete the doc output in the root & target directories + #[instrument(skip(self))] + fn cleanup(&self, target_dir: &Path) -> Result<()> { + // proc-macro crates have a `doc` directory + // directly in the target. + let doc_dir = target_dir.join("doc"); + if doc_dir.is_dir() { + debug!(?doc_dir, "cache dir cleanup"); + fs::remove_dir_all(doc_dir)?; + } + + for item in fs::read_dir(target_dir)? { + // the first level of directories are the targets in most cases, + // delete their doc-directories + let item = item?; + let doc_dir = item.path().join("doc"); + if doc_dir.is_dir() { + debug!(?doc_dir, "cache dir cleanup"); + fs::remove_dir_all(doc_dir)?; + } + } + Ok(()) + } + + fn cache_dir_for_key(&self, cache_key: &str) -> PathBuf { + self.cache_dir.join(cache_key) + } + + /// update the "last used" marker for the cache key + fn touch(&self, cache_key: &str) -> Result<()> { + let file = self + .cache_dir_for_key(cache_key) + .join(LAST_ACCESSED_FILE_NAME); + + fs::create_dir_all(file.parent().expect("we always have a parent"))?; + if file.exists() { + fs::remove_file(&file)?; + } + OpenOptions::new().create(true).write(true).open(&file)?; + Ok(()) + } + + /// return the list of cache-directories, sorted by last usage. + /// + /// The oldest / least used cache will be first. + /// To be used for cleanup. + /// + /// A missing age-marker file is interpreted as "old age". + fn all_cache_folders_by_age(&self) -> Result> { + let mut entries: Vec<(PathBuf, Option)> = fs::read_dir(&self.cache_dir)? + .filter_map(Result::ok) + .filter_map(|entry| { + let path = entry.path(); + path.is_dir().then(|| { + let last_accessed = path + .join(LAST_ACCESSED_FILE_NAME) + .metadata() + .and_then(|metadata| metadata.modified()) + .ok(); + (path, last_accessed) + }) + }) + .collect(); + + // `None` will appear first after sorting + entries.sort_by_key(|(_, time)| *time); + + Ok(entries.into_iter().map(|(path, _)| path).collect()) + } + + /// free up disk space by deleting the oldest cache folders. + /// + /// Deletes cache folders until the `free_percent_goal` is reached. + pub(crate) fn clear_disk_space(&self, free_percent_goal: f32) -> Result<()> { + let space_ok = + || -> Result { Ok(free_disk_space_ratio(&self.cache_dir)? >= free_percent_goal) }; + + if space_ok()? { + return Ok(()); + } + + for folder in self.all_cache_folders_by_age()? { + warn!( + ?folder, + "freeing up disk space by deleting oldest cache folder" + ); + fs::remove_dir_all(&folder)?; + + if space_ok()? { + break; + } + } + + Ok(()) + } + + /// restore a cached target directory. + /// + /// Will just move the cache folder into the rustwide + /// target path. If that fails, will use `copy_dir_all`. + #[instrument(skip(self))] + pub(crate) fn restore_to + std::fmt::Debug>( + &self, + cache_key: &str, + target_dir: P, + ) -> Result<()> { + let target_dir = target_dir.as_ref(); + if target_dir.exists() { + // Delete the target dir to be safe, even though most of the + // time the dir doesn't exist or is empty. + fs::remove_dir_all(target_dir).context("could not clean target directory")?; + } + + let cache_dir = self.cache_dir_for_key(cache_key); + if !cache_dir.exists() { + // when there is no existing cache dir, + // we can just create an empty target. + fs::create_dir_all(target_dir).context("could not create target directory")?; + return Ok(()); + } + + fs::create_dir_all(target_dir.parent().unwrap())?; + fs::rename(cache_dir, target_dir).context("could not move cache directory to target")?; + self.cleanup(target_dir)?; + Ok(()) + } + + #[instrument(skip(self))] + pub(crate) fn save + std::fmt::Debug>( + &self, + cache_key: &str, + target_dir: P, + ) -> Result<()> { + let cache_dir = self.cache_dir_for_key(cache_key); + if cache_dir.exists() { + fs::remove_dir_all(&cache_dir)?; + } + self.ensure_cache_exists()?; + + debug!(?target_dir, ?cache_dir, "saving artifact cache"); + fs::rename(&target_dir, &cache_dir).context("could not move target directory to cache")?; + self.cleanup(&cache_dir)?; + self.touch(cache_key)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_creates_cache_dir() { + let parent = tempfile::tempdir().unwrap(); + + let cache_dir = parent.path().join("cache"); + assert!(!cache_dir.exists()); + ArtifactCache::new(cache_dir.clone()).unwrap(); + assert!(cache_dir.exists()); + } + + #[test] + fn test_purge() { + let cache_dir = tempfile::tempdir().unwrap(); + let cache_path = cache_dir.path(); + let cache = ArtifactCache::new(cache_path.to_owned()).unwrap(); + + // put something into the cache + for key in &["1", "2", "3"] { + cache.touch(key).unwrap(); + } + + assert_eq!(fs::read_dir(cache_path).unwrap().count(), 3); + + cache.purge().unwrap(); + assert_eq!(fs::read_dir(cache_path).unwrap().count(), 0); + assert!(cache_path.exists()); + } + + #[test] + fn test_save_restore() { + let cache_dir = tempfile::tempdir().unwrap(); + let cache_path = cache_dir.path(); + let cache = ArtifactCache::new(cache_path.to_owned()).unwrap(); + + // create a dummy target directory + let target_dir = tempfile::tempdir().unwrap(); + let target_path = target_dir.path(); + + let linux_gnu = target_path.join("linux-gnu"); + fs::create_dir_all(&linux_gnu).unwrap(); + + // docs will be deleted while saving + let docs = target_path.join("doc"); + fs::create_dir(&docs).unwrap(); + fs::write(docs.join("1.html"), b"content").unwrap(); + let target_docs = linux_gnu.join("doc"); + fs::create_dir(&target_docs).unwrap(); + fs::write(target_docs.join("2.html"), b"content").unwrap(); + + // other files in the directory will be kept + fs::write(target_path.join("file1"), b"content").unwrap(); + fs::write(linux_gnu.join("file2"), b"content").unwrap(); + + cache.save("cache_key", target_path).unwrap(); + + // target is gone + assert!(!target_path.exists()); + + let saved_cache = cache_path.join("cache_key"); + // doc output is gone + assert!(!saved_cache.join("doc").exists()); + assert!(!saved_cache.join("linux-gnu").join("doc").exists()); + // the rest is there + assert!(saved_cache.join("file1").exists()); + assert!(saved_cache.join("linux-gnu").join("file2").exists()); + + cache.restore_to("cache_key", target_path).unwrap(); + // target is back + assert!(target_path.exists()); + assert!(target_path.join("file1").exists()); + assert!(target_path.join("linux-gnu").join("file2").exists()); + // cache is gone + assert!(!saved_cache.exists()); + } + + #[test] + fn test_target_doesnt_exist() { + let cache_dir = tempfile::tempdir().unwrap(); + let cache = ArtifactCache::new(cache_dir.path().to_owned()).unwrap(); + + let parent = tempfile::tempdir().unwrap(); + + let potential_target = parent.path().join("target"); + cache.restore_to("cache_key", &potential_target).unwrap(); + + assert!(potential_target.exists()); + } + + #[test] + fn test_source_doesnt_exist_errors() { + let cache_dir = tempfile::tempdir().unwrap(); + let cache = ArtifactCache::new(cache_dir.path().to_owned()).unwrap(); + + let parent = tempfile::tempdir().unwrap(); + + let non_existing_source = parent.path().join("source"); + assert!(cache.save("cache_key", non_existing_source).is_err()); + } + + #[test] + fn test_clean_disk_space() { + let cache_dir = tempfile::tempdir().unwrap(); + let cache_path = cache_dir.path(); + let cache = ArtifactCache::new(cache_path.to_owned()).unwrap(); + + // put something into the cache + for key in &["1", "2", "3"] { + cache.touch(key).unwrap(); + } + + assert_eq!(fs::read_dir(cache_path).unwrap().count(), 3); + + // will clean everything + cache.clear_disk_space(100.0).unwrap(); + + assert_eq!(fs::read_dir(cache_path).unwrap().count(), 0); + assert!(cache_path.exists()); + } + + #[test] + fn test_dont_clean_disk_space() { + let cache_dir = tempfile::tempdir().unwrap(); + let cache_path = cache_dir.path(); + let cache = ArtifactCache::new(cache_path.to_owned()).unwrap(); + + // put something into the cache + for key in &["1", "2", "3"] { + cache.touch(key).unwrap(); + } + + assert_eq!(fs::read_dir(cache_path).unwrap().count(), 3); + + // will clean nothing + cache.clear_disk_space(0.0).unwrap(); + + assert_eq!(fs::read_dir(cache_path).unwrap().count(), 3); + assert!(cache_path.exists()); + } +} diff --git a/src/docbuilder/mod.rs b/src/docbuilder/mod.rs index 6eadd9295..228c626fd 100644 --- a/src/docbuilder/mod.rs +++ b/src/docbuilder/mod.rs @@ -1,3 +1,4 @@ +mod caching; mod crates; mod limits; mod rustwide_builder; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 8df0e3876..eaaf0b930 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -1,3 +1,4 @@ +use super::caching::ArtifactCache; use crate::db::file::add_path_into_database; use crate::db::{ add_build_into_database, add_doc_coverage, add_package_into_database, @@ -5,6 +6,7 @@ use crate::db::{ }; use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; +use crate::index::api::ReleaseData; use crate::repositories::RepositoryStatsUpdater; use crate::storage::{rustdoc_archive_path, source_archive_path}; use crate::utils::{ @@ -26,13 +28,14 @@ use rustwide::{AlternativeRegistry, Build, Crate, Toolchain, Workspace, Workspac use std::collections::{HashMap, HashSet}; use std::path::Path; use std::sync::Arc; -use tracing::{debug, info, warn}; +use tracing::{debug, info, instrument, warn}; const USER_AGENT: &str = "docs.rs builder (https://github.com/rust-lang/docs.rs)"; const COMPONENTS: &[&str] = &["llvm-tools-preview", "rustc-dev", "rustfmt"]; const DUMMY_CRATE_NAME: &str = "empty-library"; const DUMMY_CRATE_VERSION: &str = "1.0.0"; +#[derive(Debug)] pub enum PackageKind<'a> { Local(&'a Path), CratesIo, @@ -47,6 +50,7 @@ pub struct RustwideBuilder { storage: Arc, metrics: Arc, index: Arc, + artifact_cache: Option, rustc_version: String, repository_stats_updater: Arc, skip_build_if_exists: bool, @@ -66,8 +70,18 @@ impl RustwideBuilder { }; builder = builder.sandbox_image(image); } + + let artifact_cache = if config.use_build_artifact_cache { + Some(ArtifactCache::new(config.prefix.join("artifact_cache"))?) + } else { + None + }; + if cfg!(test) { builder = builder.fast_init(true); + if let Some(ref artifact_cache) = artifact_cache { + artifact_cache.purge()?; + } } let workspace = builder.init().map_err(FailureError::compat)?; @@ -89,6 +103,7 @@ impl RustwideBuilder { Ok(RustwideBuilder { workspace, toolchain, + artifact_cache, config, db: context.pool()?, storage: context.storage()?, @@ -199,6 +214,9 @@ impl RustwideBuilder { let has_changed = old_version.as_deref() != Some(&self.rustc_version); if has_changed { + if let Some(ref artifact_cache) = self.artifact_cache { + artifact_cache.purge()?; + } self.add_essential_files()?; } Ok(has_changed) @@ -321,6 +339,7 @@ impl RustwideBuilder { self.build_package(&package.name, &package.version, PackageKind::Local(path)) } + #[instrument(skip(self))] pub fn build_package( &mut self, name: &str, @@ -393,6 +412,35 @@ impl RustwideBuilder { // Fetch this before we enter the sandbox, so networking isn't blocked. build.fetch_build_std_dependencies(&targets)?; + let release_data = match self + .index + .api() + .get_release_data(name, version) + .context("error fetching release data from crates.io") + { + Ok(data) => data, + Err(err) => { + warn!("{:#?}", err); + ReleaseData::default() + } + }; + + if let (Some(ref artifact_cache), Some(ref published_by)) = + (&self.artifact_cache, release_data.published_by) + { + info!( + host_target_dir=?build.host_target_dir(), + published_by_id=published_by.id, + published_by_login=published_by.login, + "restoring artifact cache", + ); + if let Err(err) = artifact_cache + .restore_to(&published_by.id.to_string(), build.host_target_dir()) + { + warn!(?err, "could not restore artifact cache"); + } + } + (|| -> Result { let mut has_docs = false; let mut successful_targets = Vec::new(); @@ -547,6 +595,30 @@ impl RustwideBuilder { } } + if let (Some(artifact_cache), Some(ref published_by)) = + (&self.artifact_cache, release_data.published_by) + { + info!( + host_target_dir=?build.host_target_dir(), + published_by_id=published_by.id, + published_by_login=published_by.login, + "saving artifact cache", + ); + if let Err(err) = artifact_cache + .save(&published_by.id.to_string(), build.host_target_dir()) + .context("error saving artifact cache") + { + warn!(?err, "could not save artifact cache"); + }; + + if let Err(err) = artifact_cache + .clear_disk_space(self.config.free_disk_space_goal) + .context("error freeing disk space on artifact cache") + { + warn!(?err, "could not clear disk space"); + } + } + if res.result.successful { // delete eventually existing files from pre-archive storage. // we're doing this in the end so eventual problems in the build @@ -710,7 +782,12 @@ impl RustwideBuilder { let old_dir = target_dir.join("doc"); let new_dir = target_dir.join(target).join("doc"); debug!("rename {} to {}", old_dir.display(), new_dir.display()); - std::fs::create_dir(target_dir.join(target))?; + { + let parent = new_dir.parent().unwrap(); + if !parent.exists() { + std::fs::create_dir(target_dir.join(target))?; + } + } std::fs::rename(old_dir, new_dir)?; } @@ -888,6 +965,8 @@ mod tests { use crate::test::{assert_redirect, assert_success, wrapper}; use serde_json::Value; + const DUMMY_CRATE_PUBLISHER_ID: &str = "4825"; + #[test] #[ignore] fn test_build_crate() { @@ -1069,6 +1148,42 @@ mod tests { }) } + #[test] + #[ignore] + fn test_artifact_cache() { + wrapper(|env| { + let crate_ = DUMMY_CRATE_NAME; + let version = DUMMY_CRATE_VERSION; + let expected_cache_dir = env + .config() + .prefix + .join("artifact_cache") + .join(DUMMY_CRATE_PUBLISHER_ID); + + let mut builder = RustwideBuilder::init(env).unwrap(); + + // first build creates the cache + assert!(!expected_cache_dir.exists()); + assert!(builder.build_package(crate_, version, PackageKind::CratesIo)?); + assert!(expected_cache_dir.exists()); + + // cache dir doesn't contain doc output + assert!(!expected_cache_dir.join("doc").exists()); + + // but seems to be a normal cargo target directory, + // which also means that `build_package` actually used the + // target directory, and it was moved into the cache afterwards. + for expected_file in &["docsrs_last_accessed", "debug", ".rustc_info.json"] { + assert!(expected_cache_dir.join(expected_file).exists()); + } + + // do a second build, should not fail + assert!(builder.build_package(crate_, version, PackageKind::CratesIo)?); + + Ok(()) + }); + } + #[test] #[ignore] fn test_proc_macro() { diff --git a/src/index/api.rs b/src/index/api.rs index 62e76f280..df682c297 100644 --- a/src/index/api.rs +++ b/src/index/api.rs @@ -12,7 +12,6 @@ const APP_USER_AGENT: &str = concat!( include_str!(concat!(env!("OUT_DIR"), "/git_version")) ); -#[derive(Debug)] pub struct Api { api_base: Option, max_retries: u32, @@ -21,14 +20,15 @@ pub struct Api { #[derive(Debug)] pub struct CrateData { - pub(crate) owners: Vec, + pub(crate) owners: Vec, } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub(crate) struct ReleaseData { pub(crate) release_time: DateTime, pub(crate) yanked: bool, pub(crate) downloads: i32, + pub(crate) published_by: Option, } impl Default for ReleaseData { @@ -37,13 +37,31 @@ impl Default for ReleaseData { release_time: Utc::now(), yanked: false, downloads: 0, + published_by: None, } } } -#[derive(Debug, Clone)] -pub struct CrateOwner { +#[derive(Deserialize)] +struct VersionData { + num: Version, + #[serde(default = "Utc::now")] + created_at: DateTime, + #[serde(default)] + yanked: bool, + #[serde(default)] + downloads: i32, + #[serde(default)] + published_by: Option, +} + +#[derive(Debug, Clone, PartialEq, Deserialize, Default)] +pub struct GithubUser { + #[serde(default)] + pub(crate) id: u64, + #[serde(default)] pub(crate) avatar: String, + #[serde(default)] pub(crate) login: String, } @@ -82,23 +100,24 @@ impl Api { } pub(crate) fn get_release_data(&self, name: &str, version: &str) -> Result { - let (release_time, yanked, downloads) = self - .get_release_time_yanked_downloads(name, version) - .context(format!("Failed to get crate data for {name}-{version}"))?; + let version = Version::parse(version)?; + let data = self + .get_versions(name) + .context(format!("Failed to get crate data for {name}-{version}"))? + .into_iter() + .find(|data| data.num == version) + .with_context(|| anyhow!("Could not find version in response"))?; Ok(ReleaseData { - release_time, - yanked, - downloads, + release_time: data.created_at, + yanked: data.yanked, + downloads: data.downloads, + published_by: data.published_by, }) } /// Get release_time, yanked and downloads from the registry's API - fn get_release_time_yanked_downloads( - &self, - name: &str, - version: &str, - ) -> Result<(DateTime, bool, i32)> { + fn get_versions(&self, name: &str) -> Result> { let url = { let mut url = self.api_base()?; url.path_segments_mut() @@ -112,35 +131,17 @@ impl Api { versions: Vec, } - #[derive(Deserialize)] - struct VersionData { - num: Version, - #[serde(default = "Utc::now")] - created_at: DateTime, - #[serde(default)] - yanked: bool, - #[serde(default)] - downloads: i32, - } - let response: Response = retry( || Ok(self.client.get(url.clone()).send()?.error_for_status()?), self.max_retries, )? .json()?; - let version = Version::parse(version)?; - let version = response - .versions - .into_iter() - .find(|data| data.num == version) - .with_context(|| anyhow!("Could not find version in response"))?; - - Ok((version.created_at, version.yanked, version.downloads)) + Ok(response.versions) } /// Fetch owners from the registry's API - fn get_owners(&self, name: &str) -> Result> { + fn get_owners(&self, name: &str) -> Result> { let url = { let mut url = self.api_base()?; url.path_segments_mut() @@ -151,15 +152,7 @@ impl Api { #[derive(Deserialize)] struct Response { - users: Vec, - } - - #[derive(Deserialize)] - struct OwnerData { - #[serde(default)] - avatar: Option, - #[serde(default)] - login: Option, + users: Vec, } let response: Response = retry( @@ -168,22 +161,150 @@ impl Api { )? .json()?; - let result = response + Ok(response .users .into_iter() - .filter(|data| { - !data - .login - .as_ref() - .map(|login| login.is_empty()) - .unwrap_or_default() - }) - .map(|data| CrateOwner { - avatar: data.avatar.unwrap_or_default(), - login: data.login.unwrap_or_default(), - }) - .collect(); - - Ok(result) + .filter(|data| !data.login.is_empty()) + .collect()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use mockito::mock; + use serde_json::{self, json}; + + fn api() -> Api { + Api::new(Some(Url::parse(&mockito::server_url()).unwrap()), 1).unwrap() + } + + #[test] + fn get_owners() { + let _m = mock("GET", "/api/v1/crates/krate/owners") + .with_header("content-type", "application/json") + .with_body( + serde_json::to_string(&json!({ + "users": [ + { + "id": 1, + "login": "the_first_owner", + "name": "name", + "avatar": "http://something", + "kind": "user", + "url": "https://github.com/the_second_owner" + }, + { + "id": 2, + "login": "the_second_owner", + "name": "another name", + "avatar": "http://anotherthing", + "kind": "user", + "url": "https://github.com/the_second_owner" + } + ]})) + .unwrap(), + ) + .create(); + + assert_eq!( + api().get_owners("krate").unwrap(), + vec![ + GithubUser { + id: 1, + avatar: "http://something".into(), + login: "the_first_owner".into() + }, + GithubUser { + id: 2, + avatar: "http://anotherthing".into(), + login: "the_second_owner".into() + } + ] + ); + } + + #[test] + fn get_release_info() { + let created = Utc::now(); + let _m = mock("GET", "/api/v1/crates/krate/versions") + .with_header("content-type", "application/json") + .with_body( + serde_json::to_string(&json!({ + "versions": [ + { + "num": "1.2.3", + "created_at": created.to_rfc3339(), + "yanked": true, + "downloads": 223, + "license": "MIT", + "published_by": { + "id": 2, + "login": "the_second_owner", + "name": "another name", + "avatar": "http://anotherthing" + } + }, + { + "num": "2.2.3", + "created_at": Utc::now().to_rfc3339(), + "yanked": false, + "downloads": 333, + "license": "MIT", + "published_by": { + "id": 1, + "login": "owner", + "name": "name", + } + } + ]})) + .unwrap(), + ) + .create(); + + assert_eq!( + api().get_release_data("krate", "1.2.3").unwrap(), + ReleaseData { + release_time: created, + yanked: true, + downloads: 223, + published_by: Some(GithubUser { + id: 2, + avatar: "http://anotherthing".into(), + login: "the_second_owner".into(), + }) + } + ); + } + + #[test] + fn get_release_info_without_publisher() { + let created = Utc::now(); + let _m = mock("GET", "/api/v1/crates/krate/versions") + .with_header("content-type", "application/json") + .with_body( + serde_json::to_string(&json!({ + "versions": [ + { + "num": "1.2.3", + "created_at": created.to_rfc3339(), + "yanked": true, + "downloads": 223, + "published_by": null + }, + ]})) + .unwrap(), + ) + .create(); + + assert_eq!( + api().get_release_data("krate", "1.2.3").unwrap(), + ReleaseData { + release_time: created, + yanked: true, + downloads: 223, + published_by: None, + } + ); } } diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 5d2583972..4da8ff442 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -2,7 +2,7 @@ use super::TestDatabase; use crate::docbuilder::{BuildResult, DocCoverage}; use crate::error::Result; -use crate::index::api::{CrateData, CrateOwner, ReleaseData}; +use crate::index::api::{CrateData, GithubUser, ReleaseData}; use crate::storage::{rustdoc_archive_path, source_archive_path, Storage}; use crate::utils::{Dependency, MetadataPackage, Target}; use anyhow::Context; @@ -85,11 +85,7 @@ impl<'a> FakeRelease<'a> { doc_targets: Vec::new(), default_target: None, registry_crate_data: CrateData { owners: Vec::new() }, - registry_release_data: ReleaseData { - release_time: Utc::now(), - yanked: false, - downloads: 0, - }, + registry_release_data: ReleaseData::default(), has_docs: true, has_examples: false, readme: None, @@ -214,7 +210,7 @@ impl<'a> FakeRelease<'a> { self.source_file("README.md", content.as_bytes()) } - pub(crate) fn add_owner(mut self, owner: CrateOwner) -> Self { + pub(crate) fn add_owner(mut self, owner: GithubUser) -> Self { self.registry_crate_data.owners.push(owner); self } diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 5a3dbe367..ed2b1ddcc 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -423,7 +423,7 @@ pub(crate) async fn get_all_releases( #[cfg(test)] mod tests { use super::*; - use crate::index::api::CrateOwner; + use crate::index::api::GithubUser; use crate::test::{ assert_cache_control, assert_redirect, assert_redirect_cached, wrapper, TestDatabase, }; @@ -820,9 +820,10 @@ mod tests { env.fake_release() .name("foo") .version("0.0.1") - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "foobar".into(), avatar: "https://example.org/foobar".into(), + ..Default::default() }) .create()?; @@ -838,13 +839,15 @@ mod tests { env.fake_release() .name("foo") .version("0.0.2") - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "foobar".into(), avatar: "https://example.org/foobarv2".into(), + ..Default::default() }) - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "barfoo".into(), avatar: "https://example.org/barfoo".into(), + ..Default::default() }) .create()?; @@ -865,9 +868,10 @@ mod tests { env.fake_release() .name("foo") .version("0.0.3") - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "barfoo".into(), avatar: "https://example.org/barfoo".into(), + ..Default::default() }) .create()?; @@ -883,9 +887,10 @@ mod tests { env.fake_release() .name("bar") .version("0.0.1") - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "barfoo".into(), avatar: "https://example.org/barfoov2".into(), + ..Default::default() }) .create()?; diff --git a/src/web/releases.rs b/src/web/releases.rs index f97f90556..ba0a5250b 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -749,7 +749,7 @@ pub(crate) async fn build_queue_handler( #[cfg(test)] mod tests { use super::*; - use crate::index::api::CrateOwner; + use crate::index::api::GithubUser; use crate::test::{ assert_redirect, assert_redirect_unchecked, assert_success, wrapper, TestFrontend, }; @@ -1473,9 +1473,10 @@ mod tests { let web = env.frontend(); env.fake_release() .name("some_random_crate") - .add_owner(CrateOwner { + .add_owner(GithubUser { login: "foobar".into(), avatar: "https://example.org/foobar".into(), + ..Default::default() }) .create()?;