From 1da9a4344032201aaeacf8583709385cfed1f81b Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Sun, 4 Dec 2022 22:18:13 -0800 Subject: [PATCH] Redirect old source page URLs to sourcegraph --- src/storage/mod.rs | 26 --- src/test/fakes.rs | 5 - src/web/builds.rs | 2 +- src/web/crate_details.rs | 2 +- src/web/features.rs | 2 +- src/web/mod.rs | 45 ---- src/web/source.rs | 465 ++------------------------------------- 7 files changed, 16 insertions(+), 531 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 3fd4a7f26..f8ba0b943 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -40,12 +40,6 @@ pub(crate) struct Blob { pub(crate) compression: Option, } -impl Blob { - pub(crate) fn is_empty(&self) -> bool { - self.mime == "application/x-empty" - } -} - fn get_file_list_from_dir>(path: P, files: &mut Vec) -> Result<()> { let path = path.as_ref(); @@ -185,26 +179,6 @@ impl Storage { }) } - pub(crate) fn fetch_source_file( - &self, - name: &str, - version: &str, - path: &str, - archive_storage: bool, - ) -> Result { - Ok(if archive_storage { - self.get_from_archive( - &source_archive_path(name, version), - path, - self.max_file_size_for(path), - None, - )? - } else { - let remote_path = format!("sources/{}/{}/{}", name, version, path); - self.get(&remote_path, self.max_file_size_for(path))? - }) - } - pub(crate) fn rustdoc_file_exists( &self, name: &str, diff --git a/src/test/fakes.rs b/src/test/fakes.rs index fb3b98683..6c3af61bd 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -99,11 +99,6 @@ impl<'a> FakeRelease<'a> { } } - pub(crate) fn description(mut self, new: impl Into) -> Self { - self.package.description = Some(new.into()); - self - } - pub(crate) fn release_time(mut self, new: DateTime) -> Self { self.registry_release_data.release_time = new; self diff --git a/src/web/builds.rs b/src/web/builds.rs index ed926c247..fb08f6e66 100644 --- a/src/web/builds.rs +++ b/src/web/builds.rs @@ -333,7 +333,7 @@ mod tests { let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); assert!(body.contains(" File { - let (name, mime) = if let Some((dir, _)) = path.split_once('/') { - (dir, "dir") - } else { - (path, mime) - }; - - Self { - name: name.to_owned(), - mime: mime.to_owned(), - } - } -} - -/// A list of source files -#[derive(Debug, Clone, PartialEq, Serialize)] -struct FileList { - metadata: MetaData, - files: Vec, -} - -impl FileList { - /// Gets FileList from a request path - /// - /// All paths stored in database have this format: - /// - /// ```text - /// [ - /// ["text/plain", ".gitignore"], - /// ["text/x-c", "src/reseeding.rs"], - /// ["text/x-c", "src/lib.rs"], - /// ["text/x-c", "README.md"], - /// ... - /// ] - /// ``` - /// - /// This function is only returning FileList for requested directory. If is empty, - /// it will return list of files (and dirs) for root directory. req_path must be a - /// directory or empty for root directory. - fn from_path( - conn: &mut Client, - name: &str, - version: &str, - version_or_latest: &str, - req_path: &str, - ) -> Option { - let rows = conn - .query( - "SELECT crates.name, - releases.version, - releases.description, - releases.target_name, - releases.rustdoc_status, - releases.files, - releases.default_target, - releases.doc_targets, - releases.yanked, - releases.doc_rustc_version - FROM releases - LEFT OUTER JOIN crates ON crates.id = releases.crate_id - WHERE crates.name = $1 AND releases.version = $2", - &[&name, &version], - ) - .unwrap(); - - if rows.is_empty() { - return None; - } - - let files: Value = rows[0].try_get(5).ok()?; - - let mut file_list = Vec::new(); - if let Some(files) = files.as_array() { - file_list.reserve(files.len()); - - for file in files { - if let Some(file) = file.as_array() { - let mime = file[0].as_str().unwrap(); - let path = file[1].as_str().unwrap(); - - // skip .cargo-ok generated by cargo - if path == ".cargo-ok" { - continue; - } - - // look only files for req_path - if let Some(path) = path.strip_prefix(req_path) { - let file = File::from_path_and_mime(path, mime); - - // avoid adding duplicates, a directory may occur more than once - if !file_list.contains(&file) { - file_list.push(file); - } - } - } - } - - if file_list.is_empty() { - return None; - } - - file_list.sort_by(|a, b| { - // directories must be listed first - if a.mime == "dir" && b.mime != "dir" { - Ordering::Less - } else if a.mime != "dir" && b.mime == "dir" { - Ordering::Greater - } else { - a.name.to_lowercase().cmp(&b.name.to_lowercase()) - } - }); - - Some(FileList { - metadata: MetaData { - name: rows[0].get(0), - version: rows[0].get(1), - version_or_latest: version_or_latest.to_string(), - description: rows[0].get(2), - target_name: rows[0].get(3), - rustdoc_status: rows[0].get(4), - default_target: rows[0].get(6), - doc_targets: MetaData::parse_doc_targets(rows[0].get(7)), - yanked: rows[0].get(8), - rustdoc_css_file: get_correct_docsrs_style_file(rows[0].get(9)).unwrap(), - }, - files: file_list, - }) - } else { - None - } - } -} - -#[derive(Debug, Clone, PartialEq, Serialize)] -struct SourcePage { - file_list: FileList, - show_parent_link: bool, - file: Option, - file_content: Option, - canonical_url: String, -} - -impl_webpage! { - SourcePage = "crate/source.html", - canonical_url = |page| Some(page.canonical_url.clone()), -} pub fn source_browser_handler(req: &mut Request) -> IronResult { let router = extension!(req, Router); @@ -191,9 +28,10 @@ pub fn source_browser_handler(req: &mut Request) -> IronResult { // use that instead crate_name = new_name; } - let (version, version_or_latest, is_latest_url) = match v.version { - MatchSemver::Latest((version, _)) => (version, "latest".to_string(), true), - MatchSemver::Exact((version, _)) => (version.clone(), version, false), + + let version = match v.version { + MatchSemver::Latest((version, _)) => version, + MatchSemver::Exact((version, _)) => version.clone(), MatchSemver::Semver((version, _)) => { let url = ctry!( req, @@ -210,114 +48,19 @@ pub fn source_browser_handler(req: &mut Request) -> IronResult { } }; - // get path (req_path) for FileList::from_path and actual path for super::file::File::from_path - let (req_path, file_path) = { + let file_path = { let mut req_path = req.url.path(); // remove first elements from path which is /crate/:name/:version/source for _ in 0..4 { req_path.remove(0); } - let file_path = req_path.join("/"); - - // FileList::from_path is only working for directories - // remove file name if it's not a directory - if let Some(last) = req_path.last_mut() { - if !last.is_empty() { - *last = ""; - } - } - - // remove crate name and version from req_path - let path = req_path - .join("/") - .replace(&format!("{}/{}/", crate_name, version), ""); - - (path, file_path) - }; - - let canonical_url = format!( - "https://docs.rs/crate/{}/latest/source/{}", - crate_name, file_path - ); - - let storage = extension!(req, Storage); - let archive_storage: bool = { - let rows = ctry!( - req, - conn.query( - " - SELECT archive_storage - FROM releases - INNER JOIN crates ON releases.crate_id = crates.id - WHERE - name = $1 AND - version = $2 - ", - &[&crate_name, &version] - ) - ); - // this unwrap is safe because `match_version` guarantees that the `crate_name`/`version` - // combination exists. - let row = rows.get(0).unwrap(); - - row.get::<_, bool>(0) - }; - - // try to get actual file first - // skip if request is a directory - let blob = if !file_path.ends_with('/') { - storage - .fetch_source_file(crate_name, &version, &file_path, archive_storage) - .ok() - } else { - None - }; - - let (file, file_content) = if let Some(blob) = blob { - let is_text = blob.mime.starts_with("text") || blob.mime == "application/json"; - // serve the file with DatabaseFileHandler if file isn't text and not empty - if !is_text && !blob.is_empty() { - return Ok(DbFile(blob).serve()); - } else if is_text && !blob.is_empty() { - let path = blob - .path - .rsplit_once('/') - .map(|(_, path)| path) - .unwrap_or(&blob.path); - ( - Some(File::from_path_and_mime(path, &blob.mime)), - String::from_utf8(blob.content).ok(), - ) - } else { - (None, None) - } - } else { - (None, None) + req_path.join("/") }; - let file_list = FileList::from_path( - &mut conn, - crate_name, - &version, - &version_or_latest, - &req_path, - ) - .ok_or(Nope::ResourceNotFound)?; - let mut response = SourcePage { - file_list, - show_parent_link: !req_path.is_empty(), - file, - file_content, - canonical_url, - } - .into_response(req)?; - response.extensions.insert::(if is_latest_url { - CachePolicy::ForeverInCdn - } else { - CachePolicy::ForeverInCdnAndStaleInBrowser - }); - Ok(response) + let url = ctry!(req, + Url::parse(&format!("https://sourcegraph.com/crates/{crate_name}@v{version}/-/blob/{file_path}"))); + return Ok(super::redirect(url)); } #[cfg(test)] @@ -326,92 +69,6 @@ mod tests { use crate::web::cache::CachePolicy; use test_case::test_case; - #[test_case(true)] - #[test_case(false)] - fn fetch_source_file_content(archive_storage: bool) { - wrapper(|env| { - env.fake_release() - .archive_storage(archive_storage) - .name("fake") - .version("0.1.0") - .source_file("some_filename.rs", b"some_random_content") - .create()?; - let web = env.frontend(); - assert_success_cached( - "/crate/fake/0.1.0/source/", - web, - CachePolicy::ForeverInCdnAndStaleInBrowser, - &env.config(), - )?; - let response = web - .get("/crate/fake/0.1.0/source/some_filename.rs") - .send()?; - assert!(response.status().is_success()); - assert_cache_control( - &response, - CachePolicy::ForeverInCdnAndStaleInBrowser, - &env.config(), - ); - assert!(response.text()?.contains("some_random_content")); - Ok(()) - }); - } - - #[test_case(true)] - #[test_case(false)] - fn cargo_ok_not_skipped(archive_storage: bool) { - wrapper(|env| { - env.fake_release() - .archive_storage(archive_storage) - .name("fake") - .version("0.1.0") - .source_file(".cargo-ok", b"ok") - .source_file("README.md", b"hello") - .create()?; - let web = env.frontend(); - assert_success("/crate/fake/0.1.0/source/", web)?; - Ok(()) - }); - } - - #[test] - fn latest_contains_links_to_latest() { - wrapper(|env| { - env.fake_release() - .archive_storage(true) - .name("fake") - .version("0.1.0") - .source_file(".cargo-ok", b"ok") - .source_file("README.md", b"hello") - .create()?; - let resp = env.frontend().get("/crate/fake/latest/source/").send()?; - assert_cache_control(&resp, CachePolicy::ForeverInCdn, &env.config()); - assert!(resp.url().as_str().ends_with("/crate/fake/latest/source/")); - let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); - assert!(body.contains(""#)); - - let response = web - .get("/crate/fake/0.1.0/source/Cargo.lock") - .send()? - .text()?; - assert!(response.contains(r#""#)); - - Ok(()) - }); - } - - #[test] - fn dotfiles_with_extension_are_highlighted() { - wrapper(|env| { - env.fake_release() - .name("fake") - .version("0.1.0") - .source_file(".rustfmt.toml", b"[rustfmt]") - .create()?; - - let web = env.frontend(); - - let response = web - .get("/crate/fake/0.1.0/source/.rustfmt.toml") - .send()? - .text()?; - assert!(response.contains(r#""#)); - - Ok(()) - }); - } - - #[test] - fn json_is_served_as_rendered_html() { - wrapper(|env| { - env.fake_release() - .name("fake") - .version("0.1.0") - .source_file("config.json", b"{}") - .create()?; - - let web = env.frontend(); - - let response = web.get("/crate/fake/0.1.0/source/config.json").send()?; - assert!(response - .headers() - .get("content-type") - .unwrap() - .to_str() - .unwrap() - .starts_with("text/html")); - assert!(response.text()?.starts_with(r#""#)); - - Ok(()) - }); - } }