diff --git a/src/config.rs b/src/config.rs index 9a751b982..0f11625b4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -26,6 +26,10 @@ pub struct Config { #[cfg(test)] pub(crate) s3_bucket_is_temporary: bool, + // CloudFront domain which we can access + // public S3 files through + pub(crate) s3_static_domain: String, + // Github authentication pub(crate) github_accesstoken: Option, pub(crate) github_updater_min_rate_limit: u32, @@ -67,6 +71,8 @@ pub struct Config { // CloudFront distribution ID for the web server. // Will be used for invalidation-requests. pub cloudfront_distribution_id_web: Option, + /// same for the `static.docs.rs` distribution + pub cloudfront_distribution_id_static: Option, // Build params pub(crate) build_attempts: u16, @@ -125,6 +131,8 @@ impl Config { #[cfg(test)] s3_bucket_is_temporary: false, + s3_static_domain: env("S3_STATIC_DOMAIN", "https://static.docs.rs".to_string())?, + github_accesstoken: maybe_env("DOCSRS_GITHUB_ACCESSTOKEN")?, github_updater_min_rate_limit: env("DOCSRS_GITHUB_UPDATER_MIN_RATE_LIMIT", 2500)?, @@ -148,6 +156,7 @@ impl Config { cdn_backend: env("DOCSRS_CDN_BACKEND", CdnKind::Dummy)?, cloudfront_distribution_id_web: maybe_env("CLOUDFRONT_DISTRIBUTION_ID_WEB")?, + cloudfront_distribution_id_static: maybe_env("CLOUDFRONT_DISTRIBUTION_ID_STATIC")?, local_archive_cache_path: env( "DOCSRS_ARCHIVE_INDEX_CACHE_PATH", diff --git a/src/db/file.rs b/src/db/file.rs index e124078e2..7ed3e219a 100644 --- a/src/db/file.rs +++ b/src/db/file.rs @@ -38,8 +38,12 @@ pub fn add_path_into_remote_archive>( storage: &Storage, archive_path: &str, path: P, + public_access: bool, ) -> Result<(Value, CompressionAlgorithm)> { let (file_list, algorithm) = storage.store_all_in_archive(archive_path, path.as_ref())?; + if public_access { + storage.set_public_access(archive_path, true)?; + } Ok(( file_list_to_json(file_list.into_iter().collect()), algorithm, diff --git a/src/db/migrate.rs b/src/db/migrate.rs index cbe4cddfc..86ed024a4 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -848,6 +848,11 @@ pub fn migrate(version: Option, conn: &mut Client) -> crate::error::Res "CREATE INDEX builds_release_id_idx ON builds (rid);", "DROP INDEX builds_release_id_idx;", ), + sql_migration!( + context, 35, "add public visibility to files table", + "ALTER TABLE files ADD COLUMN public BOOL NOT NULL DEFAULT FALSE;", + "ALTER TABLE files DROP COLUMN public;" + ), ]; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 97d653909..11792598a 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -410,6 +410,7 @@ impl RustwideBuilder { &self.storage, &rustdoc_archive_path(name, version), local_storage.path(), + true, )?; algs.insert(new_alg); }; @@ -421,6 +422,7 @@ impl RustwideBuilder { &self.storage, &source_archive_path(name, version), build.host_source_dir(), + false, )?; algs.insert(new_alg); files_list diff --git a/src/storage/database.rs b/src/storage/database.rs index 255a06d2f..f937e3b6e 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -21,6 +21,32 @@ impl DatabaseBackend { Ok(conn.query(query, &[&path])?[0].get(0)) } + pub(super) fn get_public_access(&self, path: &str) -> Result { + match self.pool.get()?.query_opt( + "SELECT public + FROM files + WHERE path = $1", + &[&path], + )? { + Some(row) => Ok(row.get(0)), + None => Err(super::PathNotFoundError.into()), + } + } + + pub(super) fn set_public_access(&self, path: &str, public: bool) -> Result<()> { + if self.pool.get()?.execute( + "UPDATE files + SET public = $2 + WHERE path = $1", + &[&path, &public], + )? == 1 + { + Ok(()) + } else { + Err(super::PathNotFoundError.into()) + } + } + pub(super) fn get( &self, path: &str, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 1191be6d2..6a873d753 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -140,6 +140,20 @@ impl Storage { } } + pub(crate) fn get_public_access(&self, path: &str) -> Result { + match &self.backend { + StorageBackend::Database(db) => db.get_public_access(path), + StorageBackend::S3(s3) => s3.get_public_access(path), + } + } + + pub(crate) fn set_public_access(&self, path: &str, public: bool) -> Result<()> { + match &self.backend { + StorageBackend::Database(db) => db.set_public_access(path, public), + StorageBackend::S3(s3) => s3.set_public_access(path, public), + } + } + fn max_file_size_for(&self, path: &str) -> usize { if path.ends_with(".html") { self.config.max_file_size_html @@ -620,9 +634,38 @@ mod backend_tests { Ok(()) } + fn test_set_public(storage: &Storage) -> Result<()> { + let path: &str = "foo/bar.txt"; + + storage.store_blobs(vec![Blob { + path: path.into(), + mime: "text/plain".into(), + date_updated: Utc::now(), + compression: None, + content: b"test content\n".to_vec(), + }])?; + + assert!(!storage.get_public_access(path)?); + storage.set_public_access(path, true)?; + assert!(storage.get_public_access(path)?); + storage.set_public_access(path, false)?; + assert!(!storage.get_public_access(path)?); + + for path in &["bar.txt", "baz.txt", "foo/baz.txt"] { + assert!(storage + .set_public_access(path, true) + .unwrap_err() + .downcast_ref::() + .is_some()); + } + + Ok(()) + } + fn test_get_object(storage: &Storage) -> Result<()> { + let path: &str = "foo/bar.txt"; let blob = Blob { - path: "foo/bar.txt".into(), + path: path.into(), mime: "text/plain".into(), date_updated: Utc::now(), compression: None, @@ -631,16 +674,25 @@ mod backend_tests { storage.store_blobs(vec![blob.clone()])?; - let found = storage.get("foo/bar.txt", std::usize::MAX)?; + let found = storage.get(path, std::usize::MAX)?; assert_eq!(blob.mime, found.mime); assert_eq!(blob.content, found.content); + // default visibility is private + assert!(!storage.get_public_access(path)?); + for path in &["bar.txt", "baz.txt", "foo/baz.txt"] { assert!(storage .get(path, std::usize::MAX) .unwrap_err() .downcast_ref::() .is_some()); + + assert!(storage + .get_public_access(path) + .unwrap_err() + .downcast_ref::() + .is_some()); } Ok(()) @@ -1028,6 +1080,7 @@ mod backend_tests { test_delete_prefix_without_matches, test_delete_percent, test_exists_without_remote_archive, + test_set_public, } tests_with_metrics { diff --git a/src/storage/s3.rs b/src/storage/s3.rs index c39d9c8ac..adf377dd0 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -3,7 +3,7 @@ use crate::{Config, Metrics}; use anyhow::{Context, Error}; use aws_sdk_s3::{ error, - model::{Delete, ObjectIdentifier}, + model::{Delete, ObjectIdentifier, Tag, Tagging}, types::SdkError, Client, Endpoint, Region, RetryConfig, }; @@ -16,6 +16,9 @@ use futures_util::{ use std::{io::Write, sync::Arc}; use tokio::runtime::Runtime; +const PUBLIC_ACCESS_TAG: &str = "static-cloudfront-access"; +const PUBLIC_ACCESS_VALUE: &str = "allow"; + pub(super) struct S3Backend { client: Client, runtime: Arc, @@ -90,6 +93,71 @@ impl S3Backend { }) } + pub(super) fn get_public_access(&self, path: &str) -> Result { + self.runtime.block_on(async { + match self + .client + .get_object_tagging() + .bucket(&self.bucket) + .key(path) + .send() + .await + { + Ok(tags) => Ok(tags + .tag_set() + .map(|tags| { + tags.iter() + .filter(|tag| tag.key() == Some(PUBLIC_ACCESS_TAG)) + .any(|tag| tag.value() == Some(PUBLIC_ACCESS_VALUE)) + }) + .unwrap_or(false)), + Err(SdkError::ServiceError { err, raw }) => { + if raw.http().status() == http::StatusCode::NOT_FOUND { + Err(super::PathNotFoundError.into()) + } else { + Err(err.into()) + } + } + Err(other) => Err(other.into()), + } + }) + } + + pub(super) fn set_public_access(&self, path: &str, public: bool) -> Result<(), Error> { + self.runtime.block_on(async { + match self + .client + .put_object_tagging() + .bucket(&self.bucket) + .key(path) + .tagging(if public { + Tagging::builder() + .tag_set( + Tag::builder() + .key(PUBLIC_ACCESS_TAG) + .value(PUBLIC_ACCESS_VALUE) + .build(), + ) + .build() + } else { + Tagging::builder().build() + }) + .send() + .await + { + Ok(_) => Ok(()), + Err(SdkError::ServiceError { err, raw }) => { + if raw.http().status() == http::StatusCode::NOT_FOUND { + Err(super::PathNotFoundError.into()) + } else { + Err(err.into()) + } + } + Err(other) => Err(other.into()), + } + }) + } + pub(super) fn get( &self, path: &str, diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 5d59a066d..05c7c595a 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -316,13 +316,21 @@ impl<'a> FakeRelease<'a> { source_directory.display() ); if archive_storage { - let archive = match kind { - FileKind::Rustdoc => rustdoc_archive_path(&package.name, &package.version), - FileKind::Sources => source_archive_path(&package.name, &package.version), + let (archive, public) = match kind { + FileKind::Rustdoc => { + (rustdoc_archive_path(&package.name, &package.version), true) + } + FileKind::Sources => { + (source_archive_path(&package.name, &package.version), false) + } }; log::debug!("store in archive: {:?}", archive); - let (files_list, new_alg) = - crate::db::add_path_into_remote_archive(&storage, &archive, source_directory)?; + let (files_list, new_alg) = crate::db::add_path_into_remote_archive( + &storage, + &archive, + source_directory, + public, + )?; let mut hm = HashSet::new(); hm.insert(new_alg); Ok((files_list, hm)) diff --git a/src/web/routes.rs b/src/web/routes.rs index a2f0c33db..ac5df67fb 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -101,6 +101,10 @@ pub(super) fn build_routes() -> Routes { "/crate/:name/:version/builds", super::builds::build_list_handler, ); + routes.internal_page( + "/crate/:name/:version/download", + super::rustdoc::download_handler, + ); routes.static_resource( "/crate/:name/:version/builds.json", super::builds::build_list_handler, diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 70396d682..dc482ca8a 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -3,11 +3,12 @@ use crate::{ db::Pool, repositories::RepositoryStatsUpdater, + storage::rustdoc_archive_path, utils, web::{ cache::CachePolicy, crate_details::CrateDetails, csp::Csp, error::Nope, file::File, match_version, metrics::RenderingTimesRecorder, parse_url_with_params, redirect_base, - MatchSemver, MetaData, + report_error, MatchSemver, MetaData, }, Config, Metrics, Storage, }; @@ -718,6 +719,52 @@ pub fn badge_handler(req: &mut Request) -> IronResult { Ok(res) } +pub fn download_handler(req: &mut Request) -> IronResult { + let router = extension!(req, Router); + let name = cexpect!(req, router.find("name")); + let req_version = cexpect!(req, router.find("version")); + + let mut conn = extension!(req, Pool).get()?; + + let version = + match match_version(&mut conn, name, Some(req_version)).and_then(|m| m.assume_exact())? { + MatchSemver::Exact((version, _)) + | MatchSemver::Latest((version, _)) + | MatchSemver::Semver((version, _)) => version, + }; + + let storage = extension!(req, Storage); + let config = extension!(req, Config); + let archive_path = rustdoc_archive_path(name, &version); + + // not all archives are set for public access yet, so we check if + // the access is set and fix it if needed. + + let archive_is_public = match storage + .get_public_access(&archive_path) + .context("reading public access for archive") + { + Ok(is_public) => is_public, + Err(err) => { + if matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) { + return Err(Nope::ResourceNotFound.into()); + } else { + report_error(&err); + return Err(Nope::InternalServerError.into()); + } + } + }; + + if !archive_is_public { + ctry!(req, storage.set_public_access(&archive_path, true)); + } + + Ok(super::redirect(ctry!( + req, + Url::parse(&format!("{}/{}", config.s3_static_domain, archive_path)) + ))) +} + /// Serves shared web resources used by rustdoc-generated documentation. /// /// This includes common `css` and `js` files that only change when the compiler is updated, but are @@ -2262,4 +2309,109 @@ mod test { Ok(()) }) } + + #[test] + fn download_unknown_version_404() { + wrapper(|env| { + let web = env.frontend(); + + assert_eq!( + web.get("/crate/dummy/0.1.0/download").send()?.status(), + StatusCode::NOT_FOUND + ); + Ok(()) + }); + } + + #[test] + fn download_old_storage_version_404() { + wrapper(|env| { + env.fake_release() + .name("dummy") + .version("0.1.0") + .archive_storage(false) + .create()?; + + let web = env.frontend(); + + assert_eq!( + web.get("/crate/dummy/0.1.0/download").send()?.status(), + StatusCode::NOT_FOUND + ); + Ok(()) + }); + } + + #[test] + fn download_semver() { + wrapper(|env| { + env.fake_release() + .name("dummy") + .version("0.1.0") + .archive_storage(true) + .create()?; + + let web = env.frontend(); + + assert_redirect_unchecked( + "/crate/dummy/0.1/download", + "https://static.docs.rs/rustdoc/dummy/0.1.0.zip", + web, + )?; + assert!(env.storage().get_public_access("rustdoc/dummy/0.1.0.zip")?); + Ok(()) + }); + } + + #[test] + fn download_specific_version() { + wrapper(|env| { + env.fake_release() + .name("dummy") + .version("0.1.0") + .archive_storage(true) + .create()?; + + let web = env.frontend(); + + // disable public access to be sure that the handler will enable it + env.storage() + .set_public_access("rustdoc/dummy/0.1.0.zip", false)?; + + assert_redirect_unchecked( + "/crate/dummy/0.1.0/download", + "https://static.docs.rs/rustdoc/dummy/0.1.0.zip", + web, + )?; + assert!(env.storage().get_public_access("rustdoc/dummy/0.1.0.zip")?); + Ok(()) + }); + } + + #[test] + fn download_latest_version() { + wrapper(|env| { + env.fake_release() + .name("dummy") + .version("0.1.0") + .archive_storage(true) + .create()?; + + env.fake_release() + .name("dummy") + .version("0.2.0") + .archive_storage(true) + .create()?; + + let web = env.frontend(); + + assert_redirect_unchecked( + "/crate/dummy/latest/download", + "https://static.docs.rs/rustdoc/dummy/0.2.0.zip", + web, + )?; + assert!(env.storage().get_public_access("rustdoc/dummy/0.2.0.zip")?); + Ok(()) + }); + } } diff --git a/src/web/sitemap.rs b/src/web/sitemap.rs index a34ee7a69..3ece789f3 100644 --- a/src/web/sitemap.rs +++ b/src/web/sitemap.rs @@ -140,7 +140,7 @@ pub fn about_handler(req: &mut Request) -> IronResult { let name = match *req.url.path().last().expect("iron is broken") { "about" | "index" => "index", - x @ "badges" | x @ "metadata" | x @ "redirections" => x, + x @ "badges" | x @ "metadata" | x @ "redirections" | x @ "download" => x, _ => { let msg = "This /about page does not exist. \ Perhaps you are interested in creating it?"; diff --git a/templates/about-base.html b/templates/about-base.html index fff90016e..3cee859cd 100644 --- a/templates/about-base.html +++ b/templates/about-base.html @@ -26,6 +26,10 @@

Docs.rs documentation

{% set text = "road" | fas(fw=true) %} {% set text = text ~ ' Shorthand URLs' %} {{ macros::active_link(expected="redirections", href="/about/redirections", text=text) }} + + {% set text = "download" | fas(fw=true) %} + {% set text = text ~ ' Download' %} + {{ macros::active_link(expected="download", href="/about/download", text=text) }} diff --git a/templates/core/about/download.html b/templates/core/about/download.html new file mode 100644 index 000000000..b40a26fa4 --- /dev/null +++ b/templates/core/about/download.html @@ -0,0 +1,62 @@ +{% extends "about-base.html" -%} + +{%- block title -%} Download {%- endblock title -%} + +{%- block body -%} +

Documentation download

+ +
+
+

+ docs.rs stores the rustdoc output in a ZIP file. +

+

+ These archives can be used as a base for further processing of + the documentation for offline readers like Dash or Zeal. + They are not directly usable for offline documentation. +

+

URLs

+

+ The download is possible for specific or semantic versions: +

+ + But also via + docs.rs/crate/clap/latest/download + to get the latest version. +

+

processing / caveats

+

+ To unpack the ZIP file you need any zip utility that supports + PKZIP version 4.6 and BZIP2 compression. +

+

+ The archives will contain all the documentation HTML files for all + targets and CSS/JS assets that are specific to the build. The default + target will be found at the root, and other targets each in its own + subfolder. +

+

+ Docs.rs is running rustdoc with --static-root-path "/", + which leads to all references to static assets breaking if they are not + available under that path. +

+

+ Since we're also adding --emit=invocation-specific to our build + the archives will not contain any static assets that are specific to the + toolchain. For now these will have to be downloaded file-by-file directly + from docs.rs. +

+
+
+{%- endblock body %}