From 26703dce1a8bfecdce5ee1a9618d859d1ca17387 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 28 Sep 2024 16:48:19 +0400 Subject: [PATCH] Cache open repositories to reduce IO & move to zlib-ng --- Cargo.lock | 88 +++++++++++++++++++++++++++++++++ Cargo.toml | 5 +- flake.nix | 6 +++ src/git.rs | 42 ++++++++++------ src/main.rs | 4 +- src/methods/filters.rs | 31 ++++++++++-- templates/repo/macros/refs.html | 6 +-- 7 files changed, 156 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2b63e4..4fe0f20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -410,6 +410,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.10.0" @@ -568,6 +577,15 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbd0f76e066e64fdc5631e3bb46381254deab9ef1158292f27c8c57e3bf3fe59" +[[package]] +name = "cmake" +version = "0.1.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.2" @@ -767,6 +785,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "darling" version = "0.20.10" @@ -863,6 +891,16 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dlv-list" version = "0.5.2" @@ -983,6 +1021,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", + "libz-ng-sys", "miniz_oxide", ] @@ -1090,6 +1129,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -1391,6 +1440,7 @@ dependencies = [ "once_cell", "parking_lot", "prodash", + "sha1", "sha1_smol", "thiserror", "walkdir", @@ -2321,6 +2371,16 @@ dependencies = [ "libz-sys", ] +[[package]] +name = "libz-ng-sys" +version = "1.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4436751a01da56f1277f323c80d584ffad94a3d14aecd959dd0dff75aa73a438" +dependencies = [ + "cmake", + "libc", +] + [[package]] name = "libz-sys" version = "1.1.20" @@ -2928,6 +2988,7 @@ name = "rgit" version = "0.1.3" dependencies = [ "anyhow", + "arc-swap", "askama", "axum", "axum-macros", @@ -3144,6 +3205,27 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", + "sha1-asm", +] + +[[package]] +name = "sha1-asm" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "286acebaf8b67c1130aedffad26f594eff0c1292389158135327d2e23aed582b" +dependencies = [ + "cc", +] + [[package]] name = "sha1_smol" version = "1.0.1" @@ -3672,6 +3754,12 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "uluru" version = "3.1.0" diff --git a/Cargo.toml b/Cargo.toml index 1c914a2..2cbeaeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ license = "WTFPL" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +arc-swap = "1.7" anyhow = "1.0" askama = "0.12.0" axum = { version = "0.7", features = ["macros"] } @@ -23,9 +24,9 @@ const-hex = "1.12" const_format = "0.2" comrak = "0.28.0" console-subscriber = { version = "0.4", features = ["parking_lot"] } -flate2 = "1.0" +flate2 = { version = "1.0", default-features = false, features = ["zlib-ng"] } futures = "0.3" -gix = "0.66" +gix = { version = "0.66", features = ["fast-sha1", "zlib-ng"] } httparse = "1.7" humantime = "2.1" itertools = "0.13.0" diff --git a/flake.nix b/flake.nix index 81ef934..6e39f8a 100644 --- a/flake.nix +++ b/flake.nix @@ -20,6 +20,7 @@ inherit src; strictDeps = true; buildInputs = pkgs.lib.optionals pkgs.stdenv.isDarwin [ pkgs.libiconv ]; + nativeBuildInputs = [ pkgs.cmake ]; LIBCLANG_PATH = "${pkgs.clang.cc.lib}/lib"; ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib"; }; @@ -95,6 +96,11 @@ description = "Timeout for incoming HTTP requests"; type = types.str; }; + pkg = mkOption { + default = rgit; + description = "rgit package to use"; + type = types.package; + }; }; config = mkIf cfg.enable { diff --git a/src/git.rs b/src/git.rs index e7c5a1b..b7bc468 100644 --- a/src/git.rs +++ b/src/git.rs @@ -3,6 +3,7 @@ use std::{ collections::{BTreeMap, VecDeque}, ffi::OsStr, fmt::{self, Arguments, Write}, + io::ErrorKind, path::{Path, PathBuf}, str::FromStr, sync::Arc, @@ -43,6 +44,7 @@ type ReadmeCacheKey = (PathBuf, Option>); pub struct Git { commits: Cache<(ObjectId, bool), Arc>, readme_cache: Cache)>>, + open_repositories: Cache, syntax_set: SyntaxSet, } @@ -51,11 +53,15 @@ impl Git { pub fn new(syntax_set: SyntaxSet) -> Self { Self { commits: Cache::builder() - .time_to_live(Duration::from_secs(10)) + .time_to_live(Duration::from_secs(30)) .max_capacity(100) .build(), readme_cache: Cache::builder() - .time_to_live(Duration::from_secs(10)) + .time_to_live(Duration::from_secs(30)) + .max_capacity(100) + .build(), + open_repositories: Cache::builder() + .time_to_idle(Duration::from_secs(120)) .max_capacity(100) .build(), syntax_set, @@ -70,20 +76,24 @@ impl Git { repo_path: PathBuf, branch: Option>, ) -> Result> { - let repo = tokio::task::spawn_blocking({ - let repo_path = repo_path.clone(); - move || { - gix::open::Options::isolated() - .open_path_as_is(true) - .open(&repo_path) - } - }) - .await - .context("Failed to join Tokio task")? - .map_err(|err| { - error!("{}", err); - anyhow!("Failed to open repository") - })?; + let repo = repo_path.clone(); + let repo = self + .open_repositories + .try_get_with_by_ref(&repo_path, async move { + tokio::task::spawn_blocking(move || { + gix::open::Options::isolated() + .open_path_as_is(true) + .open(&repo) + }) + .await + .context("Failed to join Tokio task") + .map_err(|e| std::io::Error::new(ErrorKind::Other, e))? + .map_err(|err| { + error!("{}", err); + std::io::Error::new(ErrorKind::Other, "Failed to open repository") + }) + }) + .await?; Ok(Arc::new(OpenRepository { git: self, diff --git a/src/main.rs b/src/main.rs index 0b6d1fc..566cf86 100644 --- a/src/main.rs +++ b/src/main.rs @@ -328,8 +328,8 @@ async fn run_indexer( #[must_use] pub fn build_asset_hash(v: &[u8]) -> Box { - let hasher = xxhash_rust::const_xxh3::xxh3_128(v); - let out = const_hex::encode(&hasher.to_be_bytes()); + let hasher = const_xxh3::xxh3_128(v); + let out = const_hex::encode(hasher.to_be_bytes()); Box::from(out) } diff --git a/src/methods/filters.rs b/src/methods/filters.rs index 37368a4..cba8add 100644 --- a/src/methods/filters.rs +++ b/src/methods/filters.rs @@ -1,8 +1,13 @@ // sorry clippy, we don't have a choice. askama forces this on us #![allow(clippy::unnecessary_wraps, clippy::trivially_copy_pass_by_ref)] -use std::borrow::Borrow; +use std::{ + borrow::Borrow, + collections::HashMap, + sync::{Arc, LazyLock}, +}; +use arc_swap::ArcSwap; use time::format_description::well_known::Rfc3339; pub fn format_time(s: impl Borrow) -> Result { @@ -25,8 +30,28 @@ pub fn hex(s: &[u8]) -> Result { Ok(const_hex::encode(s)) } -pub fn md5(s: &str) -> Result { - Ok(const_hex::encode(md5::compute(s).0)) +pub fn gravatar(email: &str) -> Result<&'static str, askama::Error> { + static CACHE: LazyLock>> = + LazyLock::new(|| ArcSwap::new(Arc::new(HashMap::new()))); + + if let Some(res) = CACHE.load().get(email).copied() { + return Ok(res); + } + + let url = format!( + "https://www.gravatar.com/avatar/{}", + const_hex::encode(md5::compute(email).0) + ); + let key = Box::leak(Box::from(email)); + let url = url.leak(); + + CACHE.rcu(|curr| { + let mut r = (**curr).clone(); + r.insert(key, url); + r + }); + + Ok(url) } #[allow(dead_code)] diff --git a/templates/repo/macros/refs.html b/templates/repo/macros/refs.html index 51128da..a411e91 100644 --- a/templates/repo/macros/refs.html +++ b/templates/repo/macros/refs.html @@ -14,7 +14,7 @@ {{ name }} {{ commit.get().summary }} - + {{ commit.get().author.name }} @@ -44,7 +44,7 @@ {{- name -}}.tar.gz {% if let Some(tagger) = tag.get().tagger -%} - + {{ tagger.name }} {%- endif %} @@ -80,7 +80,7 @@ {{ commit.summary }} - + {{ commit.author.name }}