Skip to content

Commit

Permalink
Use a standard HashMap instead of one based on SHA1
Browse files Browse the repository at this point in the history
It's faster throughout the board.

```
❯ cargo bench -p [email protected] --bench edit-tree
   Compiling gix-object v0.44.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-object)
   Compiling gix-pack v0.53.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-pack)
   Compiling gix-odb v0.63.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-odb)
    Finished `bench` profile [optimized] target(s) in 5.97s
     Running benches/edit_tree.rs (target/release/deps/edit_tree-6af6651a1c453a05)
Gnuplot not found, using plotters backend
editor/small tree (empty -> full -> empty)
                        time:   [2.5972 µs 2.6019 µs 2.6075 µs]
                        thrpt:  [3.8351 Melem/s 3.8434 Melem/s 3.8503 Melem/s]
                 change:
                        time:   [-32.618% -32.355% -32.038%] (p = 0.00 < 0.05)
                        thrpt:  [+47.142% +47.831% +48.409%]
                        Performance has improved.
Found 14 outliers among 100 measurements (14.00%)
  13 (13.00%) high mild
  1 (1.00%) high severe
editor/deeply nested tree (empty -> full -> empty)
                        time:   [8.2019 µs 8.2079 µs 8.2145 µs]
                        thrpt:  [5.5998 Melem/s 5.6043 Melem/s 5.6084 Melem/s]
                 change:
                        time:   [-33.517% -33.377% -33.246%] (p = 0.00 < 0.05)
                        thrpt:  [+49.804% +50.099% +50.415%]
                        Performance has improved.
Found 13 outliers among 100 measurements (13.00%)
  8 (8.00%) high mild
  5 (5.00%) high severe

cursor/small tree (empty -> full -> empty)
                        time:   [2.6911 µs 2.6935 µs 2.6961 µs]
                        thrpt:  [3.7090 Melem/s 3.7127 Melem/s 3.7160 Melem/s]
                 change:
                        time:   [-33.881% -33.546% -33.225%] (p = 0.00 < 0.05)
                        thrpt:  [+49.757% +50.480% +51.242%]
                        Performance has improved.
Found 14 outliers among 100 measurements (14.00%)
  4 (4.00%) high mild
  10 (10.00%) high severe
cursor/deeply nested tree (empty -> full -> empty)
                        time:   [1.3616 µs 1.3631 µs 1.3649 µs]
                        thrpt:  [33.703 Melem/s 33.747 Melem/s 33.783 Melem/s]
                 change:
                        time:   [-40.063% -39.675% -39.234%] (p = 0.00 < 0.05)
                        thrpt:  [+64.566% +65.769% +66.843%]
                        Performance has improved.
Found 20 outliers among 100 measurements (20.00%)
  18 (18.00%) high mild
  2 (2.00%) high severe
```
  • Loading branch information
Byron committed Sep 5, 2024
1 parent b5dc45f commit 39d8e03
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 22 deletions.
26 changes: 12 additions & 14 deletions gix-object/src/tree/editor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use crate::tree::{Editor, EntryKind};
use crate::{tree, Tree};
use bstr::{BStr, BString, ByteSlice, ByteVec};
use gix_hash::ObjectId;
use gix_hashtable::hash_map::Entry;
use std::cmp::Ordering;
use std::collections::{hash_map, HashMap};
use std::fmt::Formatter;

/// A way to constrain all [tree-edits](Editor) to a given subtree.
Expand Down Expand Up @@ -45,7 +45,7 @@ impl<'a> Editor<'a> {
Editor {
find,
object_hash,
trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))),
trees: HashMap::from_iter(Some((empty_path(), root))),
path_buf: Vec::with_capacity(256).into(),
tree_buf: Vec::with_capacity(512),
}
Expand Down Expand Up @@ -160,7 +160,7 @@ impl<'a> Editor<'a> {
}
Err(err) => {
let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
self.trees.insert(path_hash(&root_tree.1), root_tree.2);
self.trees.insert(root_tree.1, root_tree.2);
return Err(err);
}
}
Expand All @@ -179,11 +179,11 @@ impl<'a> Editor<'a> {
}
WriteMode::FromCursor => {}
}
self.trees.insert(path_hash(&rela_path), tree);
self.trees.insert(rela_path, tree);
return Ok(root_tree_id);
}
Err(err) => {
self.trees.insert(path_hash(&rela_path), tree);
self.trees.insert(rela_path, tree);
return Err(err);
}
}
Expand Down Expand Up @@ -297,8 +297,8 @@ impl<'a> Editor<'a> {
push_path_component(&mut self.path_buf, name);
let path_id = path_hash(&self.path_buf);
cursor = match self.trees.entry(path_id) {
Entry::Occupied(e) => e.into_mut(),
Entry::Vacant(e) => e.insert(
hash_map::Entry::Occupied(e) => e.into_mut(),
hash_map::Entry::Vacant(e) => e.insert(
if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
} else {
Expand All @@ -317,7 +317,7 @@ impl<'a> Editor<'a> {
/// This is useful if the same editor is re-used for various trees.
pub fn set_root(&mut self, root: Tree) -> &mut Self {
self.trees.clear();
self.trees.insert(empty_path_hash(), root);
self.trees.insert(empty_path(), root);
self
}
}
Expand Down Expand Up @@ -420,14 +420,12 @@ fn filename(path: &BStr) -> &BStr {
path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
}

fn empty_path_hash() -> ObjectId {
gix_features::hash::hasher(gix_hash::Kind::Sha1).digest().into()
fn empty_path() -> BString {
BString::default()
}

fn path_hash(path: &[u8]) -> ObjectId {
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
hasher.update(path);
hasher.digest().into()
fn path_hash(path: &[u8]) -> BString {
path.to_vec().into()
}

fn push_path_component(base: &mut BString, component: &[u8]) -> usize {
Expand Down
9 changes: 1 addition & 8 deletions gix-object/src/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::{
bstr::{BStr, BString},
tree, Tree,
};
use gix_hash::ObjectId;
use std::cmp::Ordering;

///
Expand All @@ -19,12 +18,6 @@ pub mod write;
///
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
/// with some penalties.
///
/// ### Note
///
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
/// an unintended tree.
#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
#[derive(Clone)]
pub struct Editor<'a> {
Expand All @@ -35,7 +28,7 @@ pub struct Editor<'a> {
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
/// dropped when writing at the latest.
trees: gix_hashtable::HashMap<ObjectId, Tree>,
trees: std::collections::HashMap<BString, Tree>,
/// A buffer to build up paths when finding the tree to edit.
path_buf: BString,
/// Our buffer for storing tree-data in, right before decoding it.
Expand Down

0 comments on commit 39d8e03

Please sign in to comment.