diff --git a/Cargo.lock b/Cargo.lock index c4c7d061430..fadad33adf5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2115,6 +2115,7 @@ dependencies = [ "gix-features 0.38.2", "gix-fs 0.11.3", "gix-hash 0.14.2", + "gix-hashtable 0.5.2", "gix-object 0.44.0", "gix-pack", "gix-path 0.10.10", diff --git a/gix-odb/Cargo.toml b/gix-odb/Cargo.toml index ce87448814c..bad6f16033b 100644 --- a/gix-odb/Cargo.toml +++ b/gix-odb/Cargo.toml @@ -21,6 +21,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] [dependencies] gix-features = { version = "^0.38.2", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] } +gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" } gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-date = { version = "^0.9.0", path = "../gix-date" } gix-path = { version = "^0.10.10", path = "../gix-path" } diff --git a/gix-odb/src/lib.rs b/gix-odb/src/lib.rs index 40d7c2aea90..aaedc74a175 100644 --- a/gix-odb/src/lib.rs +++ b/gix-odb/src/lib.rs @@ -66,6 +66,9 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink { } } +/// +pub mod memory; + mod sink; /// diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs new file mode 100644 index 00000000000..6bdd6e0c9c5 --- /dev/null +++ b/gix-odb/src/memory.rs @@ -0,0 +1,131 @@ +use crate::find::Header; +use gix_object::Data; +use std::cell::RefCell; +use std::ops::{Deref, DerefMut}; + +/// An object database to read from any implementation but write to memory. +/// Previously written objects can be returned from memory upon query which +/// makes the view of objects consistent, but it's impact temporary unless +/// [`memory objects`](Proxy::memory) are persisted in a separate step. +/// +/// It's possible to turn off the memory by removing it from the instance. +pub struct Proxy { + /// The actual odb implementation + inner: T, + /// The kind of hash to produce when writing new objects. + object_hash: gix_hash::Kind, + /// The storage for in-memory objects. + /// If `None`, the proxy will always read from and write-through to `inner`. + pub memory: Option>, +} + +/// Lifecycle +impl Proxy { + /// Create a new instance using `odb` as actual object provider, with an empty in-memory store for + /// objects that are to be written. + /// Use `object_hash` to determine the kind of hash to produce when writing new objects. + pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy { + Proxy { + inner: odb, + object_hash, + memory: Some(Default::default()), + } + } +} + +impl gix_object::Find for Proxy +where + T: gix_object::Find, +{ + fn try_find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result>, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + buffer.clear(); + buffer.extend_from_slice(data); + return Ok(Some(Data { + kind: *kind, + data: &*buffer, + })); + } + } + self.inner.try_find(id, buffer) + } +} + +impl crate::Header for Proxy +where + T: crate::Header, +{ + fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + return Ok(Some(Header::Loose { + kind: *kind, + size: data.len() as u64, + })); + } + } + self.inner.try_header(id) + } +} + +impl crate::Write for Proxy +where + T: crate::Write, +{ + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_stream(kind, size, from); + }; + + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + + let id = gix_object::compute_hash(self.object_hash, kind, &buf); + map.borrow_mut().insert(id, (kind, buf)); + Ok(id) + } +} + +impl Deref for Proxy { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for Proxy { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +/// A mapping between an object id and all data corresponding to an object, acting like a `HashMap`. +#[derive(Default, Debug, Clone, Eq, PartialEq)] +pub struct Storage(gix_hashtable::HashMap)>); + +impl Deref for Storage { + type Target = gix_hashtable::HashMap)>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Storage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/gix-odb/tests/odb/memory.rs b/gix-odb/tests/odb/memory.rs new file mode 100644 index 00000000000..7e984edc970 --- /dev/null +++ b/gix-odb/tests/odb/memory.rs @@ -0,0 +1,98 @@ +use crate::odb::hex_to_id; +use gix_object::{tree, FindExt}; +use gix_odb::{Header, HeaderExt, Write}; +use gix_testtools::tempfile::TempDir; + +#[test] +fn without_memory() -> crate::Result { + let (mut odb, _tmp) = db_rw()?; + let mut buf = Vec::new(); + let mem = odb.memory.take().expect("it starts out with memory set").into_inner(); + assert_eq!(mem.len(), 0, "no object is stored initially"); + let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c"); + let tree = odb.find_tree(&existing, &mut buf).expect("present and valid"); + assert_eq!(tree.entries.len(), 1); + odb.header(existing).expect("header can be found just the same"); + + let mut tree = tree.to_owned(); + tree.entries.push(tree::Entry { + mode: tree::EntryKind::Blob.into(), + filename: "z-for-sorting_another-file-with-same-content".into(), + oid: existing, + }); + let new_tree_id = odb.write(&tree)?; + assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c")); + let actual = odb.header(new_tree_id).expect("header of new objects can be found"); + assert_eq!(actual.kind(), gix_object::Kind::Tree); + assert_eq!(actual.size(), 104); + + let new_tree = odb + .find_tree(&new_tree_id, &mut buf) + .expect("new tree is also available as object") + .to_owned(); + assert_eq!(new_tree, tree); + + Ok(()) +} + +#[test] +fn with_memory() -> crate::Result { + let mut odb = db()?; + assert_eq!( + (*odb).iter()?.count(), + 6, + "let's be sure we didn't accidentally write anything" + ); + let mut buf = Vec::new(); + let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c"); + let tree = odb.find_tree(&existing, &mut buf).expect("present and valid"); + assert_eq!(tree.entries.len(), 1); + odb.header(existing).expect("header can be found just the same"); + assert_eq!( + odb.memory.as_ref().unwrap().borrow().len(), + 0, + "nothing is stored when fetching objects - it's not an object cache" + ); + + let mut tree = tree.to_owned(); + tree.entries.push(tree::Entry { + mode: tree::EntryKind::Blob.into(), + filename: "z-for-sorting_another-file-with-same-content".into(), + oid: existing, + }); + let new_tree_id = odb.write(&tree)?; + assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c")); + let actual = odb.header(new_tree_id).expect("header of new objects can be found"); + assert_eq!(actual.kind(), gix_object::Kind::Tree); + assert_eq!(actual.size(), 104); + + let new_tree = odb + .find_tree(&new_tree_id, &mut buf) + .expect("new tree is also available as object") + .to_owned(); + assert_eq!(new_tree, tree); + + let mem = odb.memory.take().expect("memory is still available").into_inner(); + assert_eq!(mem.len(), 1, "one new object was just written"); + + assert_eq!( + odb.try_header(&new_tree_id)?, + None, + "without memory, the object can't be found anymore" + ); + + Ok(()) +} + +fn db() -> crate::Result> { + let odb = gix_odb::at( + gix_testtools::scripted_fixture_read_only_standalone("repo_with_loose_objects.sh")?.join(".git/objects"), + )?; + Ok(gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1)) +} + +fn db_rw() -> crate::Result<(gix_odb::memory::Proxy, TempDir)> { + let tmp = gix_testtools::scripted_fixture_writable_standalone("repo_with_loose_objects.sh")?; + let odb = gix_odb::at(tmp.path().join(".git/objects"))?; + Ok((gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1), tmp)) +} diff --git a/gix-odb/tests/odb/mod.rs b/gix-odb/tests/odb/mod.rs index 6e4b1506bae..353bb78ee66 100644 --- a/gix-odb/tests/odb/mod.rs +++ b/gix-odb/tests/odb/mod.rs @@ -18,6 +18,7 @@ fn db_small_packs() -> gix_odb::Handle { pub mod alternate; pub mod find; pub mod header; +pub mod memory; pub mod regression; pub mod sink; pub mod store;