Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --max-syntax-highlighting-length, set to 400 #1746

Merged
merged 2 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 90 additions & 20 deletions src/ansi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,19 @@ pub fn measure_text_width(s: &str) -> usize {
})
}

/// Truncate string such that `tail` is present as a suffix, preceded by as much of `s` as can be
/// displayed in the requested width.
// Return string constructed as follows:
// 1. `display_width` characters are available. If the string fits, return it.
//
// 2. Contribute graphemes and ANSI escape sequences from `tail` until either (1) `tail` is
// exhausted, or (2) the display width of the result would exceed `display_width`.
//
// 3. If tail was exhausted, then contribute graphemes and ANSI escape sequences from `s` until the
// display_width of the result would exceed `display_width`.
pub fn truncate_str<'a>(s: &'a str, display_width: usize, tail: &str) -> Cow<'a, str> {
fn truncate_str_impl<'a>(
s: &'a str,
display_width: usize,
tail: &str,
fill2w: Option<char>,
) -> Cow<'a, str> {
let items = ansi_strings_iterator(s).collect::<Vec<(&str, bool)>>();
let width = strip_ansi_codes_from_strings_iterator(items.iter().copied()).width();
if width <= display_width {
return Cow::from(s);
}
let result_tail = if !tail.is_empty() {
truncate_str(tail, display_width, "").to_string()
truncate_str_impl(tail, display_width, "", fill2w).to_string()
} else {
String::new()
};
Expand All @@ -51,20 +46,60 @@ pub fn truncate_str<'a>(s: &'a str, display_width: usize, tail: &str) -> Cow<'a,
for (t, is_ansi) in items {
if !is_ansi {
for g in t.graphemes(true) {
let w = g.width();
if used + w > display_width {
result.push_str(&" ".repeat(display_width.saturating_sub(used)));
let width_of_grapheme = g.width();
if used + width_of_grapheme > display_width {
// Handle case "2." mentioned in `truncate_str` docs and fill the
// hole left by double-width (2w) truncation.
if let Some(fillchar) = fill2w {
if width_of_grapheme == 2 && used < display_width {
result.push(fillchar);
} else if width_of_grapheme > 2 {
// Should not happen, this means either unicode_segmentation
// graphemes are too wide, or the unicode_width is calculated wrong.
// Fallback:
debug_assert!(width_of_grapheme <= 2, "strange grapheme width");
for _ in 0..display_width.saturating_sub(used) {
result.push(fillchar);
}
}
}
break;
}
result.push_str(g);
used += w;
used += width_of_grapheme;
}
} else {
result.push_str(t);
}
}

Cow::from(format!("{result}{result_tail}"))
result.push_str(&result_tail);
Cow::from(result)
}

/// Truncate string such that `tail` is present as a suffix, preceded by as much of `s` as can be
/// displayed in the requested width. Even with `tail` empty the result may not be a prefix of `s`.
// Return string constructed as follows:
// 1. `display_width` characters are available. If the string fits, return it.
//
// 2. If a double-width (fullwidth) grapheme has to be cut in the following steps, replace the first
// half with a space (' '). If this happens the result is no longer a prefix of the input.
//
// 3. Contribute graphemes and ANSI escape sequences from `tail` until either (1) `tail` is
// exhausted, or (2) the display width of the result would exceed `display_width`.
//
// 4. If tail was exhausted, then contribute graphemes and ANSI escape sequences from `s` until the
// display_width of the result would exceed `display_width`.
pub fn truncate_str<'a>(s: &'a str, display_width: usize, tail: &str) -> Cow<'a, str> {
truncate_str_impl(s, display_width, tail, Some(' '))
}

/// Truncate string `s` so it fits into `display_width`, ignoring any ANSI escape sequences when
/// calculating the width. If a double-width ("fullwidth") grapheme has to be cut, it is omitted and
/// the resulting string is *shorter* than `display_width`. But this way the result is always a
/// prefix of the input `s`.
pub fn truncate_str_short(s: &str, display_width: usize) -> Cow<str> {
truncate_str_impl(s, display_width, "", None)
}

pub fn parse_style_sections(s: &str) -> Vec<(ansi_term::Style, &str)> {
Expand Down Expand Up @@ -180,12 +215,12 @@ pub fn explain_ansi(line: &str, colorful: bool) -> String {

#[cfg(test)]
mod tests {
use crate::ansi::ansi_preserving_index;
use unicode_width::UnicodeWidthStr;

// Note that src/ansi/console_tests.rs contains additional test coverage for this module.
use super::{
ansi_preserving_slice, measure_text_width, parse_first_style,
string_starts_with_ansi_style_sequence, strip_ansi_codes, truncate_str,
ansi_preserving_index, ansi_preserving_slice, measure_text_width, parse_first_style,
string_starts_with_ansi_style_sequence, strip_ansi_codes, truncate_str, truncate_str_short,
};

#[test]
Expand Down Expand Up @@ -292,4 +327,39 @@ mod tests {
assert_eq!(truncate_str("123", 2, "→"), "1→");
assert_eq!(truncate_str("12ݶ", 1, "ݶ"), "ݶ");
}

#[test]
fn test_truncate_str_at_double_width_grapheme() {
let one_double_four = "1#4";
let double = "/";
assert_eq!(one_double_four.width(), 4);
assert_eq!(double.width(), 2);

assert_eq!(truncate_str(one_double_four, 1, ""), "1");
assert_eq!(truncate_str(one_double_four, 2, ""), "1 ");
assert_eq!(truncate_str(one_double_four, 3, ""), "1#");
assert_eq!(truncate_str(one_double_four, 4, ""), "1#4");

assert_eq!(truncate_str_short(one_double_four, 1), "1");
assert_eq!(truncate_str_short(one_double_four, 2), "1"); // !!
assert_eq!(truncate_str_short(one_double_four, 3), "1#");
assert_eq!(truncate_str_short(one_double_four, 4), "1#4");

assert_eq!(truncate_str(one_double_four, 1, double), " ");
assert_eq!(truncate_str(one_double_four, 2, double), "/");
assert_eq!(truncate_str(one_double_four, 3, double), "1/");
assert_eq!(truncate_str(one_double_four, 4, double), "1#4");

assert_eq!(truncate_str(one_double_four, 0, ""), "");
assert_eq!(truncate_str(one_double_four, 0, double), "");
assert_eq!(truncate_str_short(one_double_four, 0), "");

assert_eq!(truncate_str(double, 0, double), "");
assert_eq!(truncate_str(double, 1, double), " ");
assert_eq!(truncate_str(double, 2, double), double);

assert_eq!(truncate_str_short(double, 0), "");
assert_eq!(truncate_str_short(double, 1), "");
assert_eq!(truncate_str_short(double, 2), double);
}
}
18 changes: 14 additions & 4 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -734,12 +734,22 @@ pub struct Opt {
/// insertion operations transforming one into the other.
pub max_line_distance: f64,

#[arg(long = "max-line-length", default_value = "512", value_name = "N")]
#[arg(
long = "max-syntax-highlighting-length",
default_value = "400",
value_name = "N"
)]
/// Stop syntax highlighting lines after this many characters.
///
/// To always highlight entire lines, set to zero - but note that delta will be slow on very
/// long lines (e.g. minified .js).
pub max_syntax_length: usize,

#[arg(long = "max-line-length", default_value = "3000", value_name = "N")]
/// Truncate lines longer than this.
///
/// To prevent any truncation, set to zero. Note that delta will be slow on very long lines
/// (e.g. minified .js) if truncation is disabled. When wrapping lines it is automatically set
/// to fit at least all visible characters.
/// To prevent any truncation, set to zero. When wrapping lines this does nothing as it is
/// overwritten to fit at least all visible characters, see `--wrap-max-lines`.
pub max_line_length: usize,

#[arg(
Expand Down
2 changes: 2 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ pub struct Config {
pub max_line_distance_for_naively_paired_lines: f64,
pub max_line_distance: f64,
pub max_line_length: usize,
pub max_syntax_length: usize,
pub merge_conflict_begin_symbol: String,
pub merge_conflict_ours_diff_header_style: Style,
pub merge_conflict_theirs_diff_header_style: Style,
Expand Down Expand Up @@ -392,6 +393,7 @@ impl From<cli::Opt> for Config {
} else {
opt.max_line_length
},
max_syntax_length: opt.max_syntax_length,
merge_conflict_begin_symbol: opt.merge_conflict_begin_symbol,
merge_conflict_ours_diff_header_style: styles["merge-conflict-ours-diff-header-style"],
merge_conflict_theirs_diff_header_style: styles
Expand Down
1 change: 1 addition & 0 deletions src/options/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ pub fn set_options(
map_styles,
max_line_distance,
max_line_length,
max_syntax_length,
// Hack: minus-style must come before minus-*emph-style because the latter default
// dynamically to the value of the former.
merge_conflict_begin_symbol,
Expand Down
39 changes: 34 additions & 5 deletions src/paint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,11 +696,40 @@ pub fn get_syntax_style_sections_for_lines<'a>(
) {
(Some(highlighter), true) => {
for (line, _) in lines.iter() {
line_sections.push(
highlighter
.highlight_line(line, &config.syntax_set)
.unwrap(),
);
// Fast but simple length comparison. Overcounts non-printable ansi
// characters or wider UTF-8, but `truncate_str_short` in the
// else branch corrects that.
if line.len() < config.max_syntax_length || config.max_syntax_length == 0 {
line_sections.push(
highlighter
.highlight_line(line, &config.syntax_set)
.unwrap(),
);
} else {
let line_syntax = ansi::truncate_str_short(line, config.max_syntax_length);
// Re-split to get references into `line` with correct lifetimes.
// SAFETY: slicing the string is safe because `truncate_str_short` always
// returns a prefix of the input and only cuts at grapheme borders.
let (with_syntax, plain) = line.split_at(line_syntax.len());
// Note: splitting a line and only feeding one half to the highlighter may
// result in wrong highlighting until it is reset the next hunk.
//
// Also, as lines are no longer newline terminated they might not be
// highlighted correctly, and because of lifetimes inserting '\n' here is not
// possible, also see `prepare()`.
line_sections.push(
highlighter
.highlight_line(with_syntax, &config.syntax_set)
.unwrap(),
);

if !plain.is_empty() {
line_sections
.last_mut()
.unwrap()
.push((config.null_syntect_style, plain));
}
}
}
}
_ => {
Expand Down
Loading
Loading