Skip to content

Commit

Permalink
Use simdutf8 for interacting with files within Git
Browse files Browse the repository at this point in the history
  • Loading branch information
w4 committed Sep 28, 2024
1 parent a3ead79 commit 0fad4f4
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 14 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ rand = "0.8.5"
rocksdb = { version = "0.22", default-features = false, features = ["snappy"] }
rust-ini = "0.21.1"
serde = { version = "1.0", features = ["derive", "rc"] }
simdutf8 = "0.1.5"
syntect = "5"
tar = "0.4"
time = { version = "0.3", features = ["serde"] }
Expand Down
39 changes: 25 additions & 14 deletions src/git.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{
borrow::Cow,
collections::{BTreeMap, VecDeque},
collections::VecDeque,
ffi::OsStr,
fmt::{self, Arguments, Write},
io::ErrorKind,
Expand Down Expand Up @@ -157,15 +157,17 @@ impl OpenRepository {
.or_else(|| path.file_name())
.map_or_else(|| Cow::Borrowed(""), OsStr::to_string_lossy);

let content = match (formatted, String::from_utf8(blob.take_data())) {
let content = match (formatted, simdutf8::basic::from_utf8(&blob.data)) {
(true, Err(_)) => Content::Binary(vec![]),
(true, Ok(data)) => Content::Text(Cow::Owned(format_file(
&data,
data,
&extension,
&self.git.syntax_set,
)?)),
(false, Err(e)) => Content::Binary(e.into_bytes()),
(false, Ok(data)) => Content::Text(Cow::Owned(data)),
(false, Err(_)) => Content::Binary(blob.take_data()),
(false, Ok(_data)) => Content::Text(Cow::Owned(unsafe {
String::from_utf8_unchecked(blob.take_data())
})),
};

return Ok(PathDestination::File(FileWithContent {
Expand Down Expand Up @@ -295,7 +297,7 @@ impl OpenRepository {
continue;
};

let Ok(content) = std::str::from_utf8(&blob.data) else {
let Ok(content) = simdutf8::basic::from_utf8(&blob.data) else {
continue;
};

Expand Down Expand Up @@ -757,7 +759,7 @@ fn fetch_diff_and_stats(
.transpose()?
.unwrap_or_else(|| repo.empty_tree());

let mut diffs = BTreeMap::<_, FileDiff>::new();
let mut diffs = Vec::new();
let mut diff_output = String::new();

let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
Expand Down Expand Up @@ -795,9 +797,9 @@ fn fetch_diff_and_stats(
diffs.iter().fold(
(0, 0, 0, 0, 0),
|(max_file_name_length, max_change_length, files_changed, insertions, deletions),
(f, stats)| {
stats| {
(
max_file_name_length.max(f.len()),
max_file_name_length.max(stats.path.len()),
max_change_length
.max(((stats.insertions + stats.deletions).ilog10() + 1) as usize),
files_changed + 1,
Expand All @@ -811,7 +813,7 @@ fn fetch_diff_and_stats(

let total_changes = insertions + deletions;

for (file, diff) in &diffs {
for diff in &diffs {
let local_changes = diff.insertions + diff.deletions;
let width = WIDTH.min(local_changes);

Expand All @@ -829,6 +831,7 @@ fn fetch_diff_and_stats(
let plus_str = "+".repeat(adjusted_addition_width);
let minus_str = "-".repeat(adjusted_deletion_width);

let file = diff.path.as_str();
writeln!(diff_stats, " {file:max_file_name_length$} | {local_changes:max_change_length$} {plus_str}{minus_str}").unwrap();
}

Expand Down Expand Up @@ -864,6 +867,7 @@ fn fetch_diff_and_stats(

#[derive(Default, Debug)]
struct FileDiff {
path: String,
insertions: usize,
deletions: usize,
}
Expand Down Expand Up @@ -1039,11 +1043,12 @@ trait DiffFormatter {
struct DiffBuilder<'a, F> {
output: &'a mut String,
resource_cache: &'a mut gix::diff::blob::Platform,
diffs: &'a mut BTreeMap<String, FileDiff>,
diffs: &'a mut Vec<FileDiff>,
formatter: F,
}

impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
#[allow(clippy::too_many_lines)]
fn handle(
&mut self,
change: gix::object::tree::diff::Change<'_, '_, '_>,
Expand All @@ -1052,7 +1057,11 @@ impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
return Ok(gix::object::tree::diff::Action::Continue);
}

let diff = self.diffs.entry(change.location.to_string()).or_default();
let mut diff = FileDiff {
path: change.location.to_string(),
insertions: 0,
deletions: 0,
};
let change = change.diff(self.resource_cache)?;

let prep = change.resource_cache.prepare_diff()?;
Expand Down Expand Up @@ -1129,10 +1138,10 @@ impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
.file_header(self.output, format_args!("+++ {new_path}"));

let old_source = gix::diff::blob::sources::lines_with_terminator(
std::str::from_utf8(prep.old.data.as_slice().unwrap_or_default())?,
simdutf8::basic::from_utf8(prep.old.data.as_slice().unwrap_or_default())?,
);
let new_source = gix::diff::blob::sources::lines_with_terminator(
std::str::from_utf8(prep.new.data.as_slice().unwrap_or_default())?,
simdutf8::basic::from_utf8(prep.new.data.as_slice().unwrap_or_default())?,
);
let input = gix::diff::blob::intern::InternedInput::new(old_source, new_source);

Expand Down Expand Up @@ -1166,6 +1175,8 @@ impl<'a, F: DiffFormatter + Callback> DiffBuilder<'a, F> {
}
}

self.diffs.push(diff);

self.resource_cache.clear_resource_cache_keep_allocation();
Ok(gix::object::tree::diff::Action::Continue)
}
Expand Down

0 comments on commit 0fad4f4

Please sign in to comment.