diff --git a/src/ansi/mod.rs b/src/ansi/mod.rs index a0f7f9733..856c39022 100644 --- a/src/ansi/mod.rs +++ b/src/ansi/mod.rs @@ -25,24 +25,19 @@ pub fn measure_text_width(s: &str) -> usize { }) } -/// Truncate string such that `tail` is present as a suffix, preceded by as much of `s` as can be -/// displayed in the requested width. -// Return string constructed as follows: -// 1. `display_width` characters are available. If the string fits, return it. -// -// 2. Contribute graphemes and ANSI escape sequences from `tail` until either (1) `tail` is -// exhausted, or (2) the display width of the result would exceed `display_width`. -// -// 3. If tail was exhausted, then contribute graphemes and ANSI escape sequences from `s` until the -// display_width of the result would exceed `display_width`. -pub fn truncate_str<'a>(s: &'a str, display_width: usize, tail: &str) -> Cow<'a, str> { +fn truncate_str_impl<'a>( + s: &'a str, + display_width: usize, + tail: &str, + fill2w: Option, +) -> Cow<'a, str> { let items = ansi_strings_iterator(s).collect::>(); let width = strip_ansi_codes_from_strings_iterator(items.iter().copied()).width(); if width <= display_width { return Cow::from(s); } let result_tail = if !tail.is_empty() { - truncate_str(tail, display_width, "").to_string() + truncate_str_impl(tail, display_width, "", fill2w).to_string() } else { String::new() }; @@ -51,20 +46,60 @@ pub fn truncate_str<'a>(s: &'a str, display_width: usize, tail: &str) -> Cow<'a, for (t, is_ansi) in items { if !is_ansi { for g in t.graphemes(true) { - let w = g.width(); - if used + w > display_width { - result.push_str(&" ".repeat(display_width.saturating_sub(used))); + let width_of_grapheme = g.width(); + if used + width_of_grapheme > display_width { + // Handle case "2." mentioned in `truncate_str` docs and fill the + // hole left by double-width (2w) truncation. + if let Some(fillchar) = fill2w { + if width_of_grapheme == 2 && used < display_width { + result.push(fillchar); + } else if width_of_grapheme > 2 { + // Should not happen, this means either unicode_segmentation + // graphemes are too wide, or the unicode_width is calculated wrong. + // Fallback: + debug_assert!(width_of_grapheme <= 2, "strange grapheme width"); + for _ in 0..display_width.saturating_sub(used) { + result.push(fillchar); + } + } + } break; } result.push_str(g); - used += w; + used += width_of_grapheme; } } else { result.push_str(t); } } - Cow::from(format!("{result}{result_tail}")) + result.push_str(&result_tail); + Cow::from(result) +} + +/// Truncate string such that `tail` is present as a suffix, preceded by as much of `s` as can be +/// displayed in the requested width. Even with `tail` empty the result may not be a prefix of `s`. +// Return string constructed as follows: +// 1. `display_width` characters are available. If the string fits, return it. +// +// 2. If a double-width (fullwidth) grapheme has to be cut in the following steps, replace the first +// half with a space (' '). If this happens the result is no longer a prefix of the input. +// +// 3. Contribute graphemes and ANSI escape sequences from `tail` until either (1) `tail` is +// exhausted, or (2) the display width of the result would exceed `display_width`. +// +// 4. If tail was exhausted, then contribute graphemes and ANSI escape sequences from `s` until the +// display_width of the result would exceed `display_width`. +pub fn truncate_str<'a>(s: &'a str, display_width: usize, tail: &str) -> Cow<'a, str> { + truncate_str_impl(s, display_width, tail, Some(' ')) +} + +/// Truncate string `s` so it fits into `display_width`, ignoring any ANSI escape sequences when +/// calculating the width. If a double-width ("fullwidth") grapheme has to be cut, it is omitted and +/// the resulting string is *shorter* than `display_width`. But this way the result is always a +/// prefix of the input `s`. +pub fn truncate_str_short(s: &str, display_width: usize) -> Cow { + truncate_str_impl(s, display_width, "", None) } pub fn parse_style_sections(s: &str) -> Vec<(ansi_term::Style, &str)> { @@ -180,12 +215,12 @@ pub fn explain_ansi(line: &str, colorful: bool) -> String { #[cfg(test)] mod tests { - use crate::ansi::ansi_preserving_index; + use unicode_width::UnicodeWidthStr; // Note that src/ansi/console_tests.rs contains additional test coverage for this module. use super::{ - ansi_preserving_slice, measure_text_width, parse_first_style, - string_starts_with_ansi_style_sequence, strip_ansi_codes, truncate_str, + ansi_preserving_index, ansi_preserving_slice, measure_text_width, parse_first_style, + string_starts_with_ansi_style_sequence, strip_ansi_codes, truncate_str, truncate_str_short, }; #[test] @@ -292,4 +327,39 @@ mod tests { assert_eq!(truncate_str("123", 2, "→"), "1→"); assert_eq!(truncate_str("12ݶ", 1, "ݶ"), "ݶ"); } + + #[test] + fn test_truncate_str_at_double_width_grapheme() { + let one_double_four = "1#4"; + let double = "/"; + assert_eq!(one_double_four.width(), 4); + assert_eq!(double.width(), 2); + + assert_eq!(truncate_str(one_double_four, 1, ""), "1"); + assert_eq!(truncate_str(one_double_four, 2, ""), "1 "); + assert_eq!(truncate_str(one_double_four, 3, ""), "1#"); + assert_eq!(truncate_str(one_double_four, 4, ""), "1#4"); + + assert_eq!(truncate_str_short(one_double_four, 1), "1"); + assert_eq!(truncate_str_short(one_double_four, 2), "1"); // !! + assert_eq!(truncate_str_short(one_double_four, 3), "1#"); + assert_eq!(truncate_str_short(one_double_four, 4), "1#4"); + + assert_eq!(truncate_str(one_double_four, 1, double), " "); + assert_eq!(truncate_str(one_double_four, 2, double), "/"); + assert_eq!(truncate_str(one_double_four, 3, double), "1/"); + assert_eq!(truncate_str(one_double_four, 4, double), "1#4"); + + assert_eq!(truncate_str(one_double_four, 0, ""), ""); + assert_eq!(truncate_str(one_double_four, 0, double), ""); + assert_eq!(truncate_str_short(one_double_four, 0), ""); + + assert_eq!(truncate_str(double, 0, double), ""); + assert_eq!(truncate_str(double, 1, double), " "); + assert_eq!(truncate_str(double, 2, double), double); + + assert_eq!(truncate_str_short(double, 0), ""); + assert_eq!(truncate_str_short(double, 1), ""); + assert_eq!(truncate_str_short(double, 2), double); + } } diff --git a/src/cli.rs b/src/cli.rs index a21918f72..28d196280 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -734,12 +734,22 @@ pub struct Opt { /// insertion operations transforming one into the other. pub max_line_distance: f64, - #[arg(long = "max-line-length", default_value = "512", value_name = "N")] + #[arg( + long = "max-syntax-highlighting-length", + default_value = "400", + value_name = "N" + )] + /// Stop syntax highlighting lines after this many characters. + /// + /// To always highlight entire lines, set to zero - but note that delta will be slow on very + /// long lines (e.g. minified .js). + pub max_syntax_length: usize, + + #[arg(long = "max-line-length", default_value = "3000", value_name = "N")] /// Truncate lines longer than this. /// - /// To prevent any truncation, set to zero. Note that delta will be slow on very long lines - /// (e.g. minified .js) if truncation is disabled. When wrapping lines it is automatically set - /// to fit at least all visible characters. + /// To prevent any truncation, set to zero. When wrapping lines this does nothing as it is + /// overwritten to fit at least all visible characters, see `--wrap-max-lines`. pub max_line_length: usize, #[arg( diff --git a/src/config.rs b/src/config.rs index 351d57d4c..c2fbccb73 100644 --- a/src/config.rs +++ b/src/config.rs @@ -101,6 +101,7 @@ pub struct Config { pub max_line_distance_for_naively_paired_lines: f64, pub max_line_distance: f64, pub max_line_length: usize, + pub max_syntax_length: usize, pub merge_conflict_begin_symbol: String, pub merge_conflict_ours_diff_header_style: Style, pub merge_conflict_theirs_diff_header_style: Style, @@ -392,6 +393,7 @@ impl From for Config { } else { opt.max_line_length }, + max_syntax_length: opt.max_syntax_length, merge_conflict_begin_symbol: opt.merge_conflict_begin_symbol, merge_conflict_ours_diff_header_style: styles["merge-conflict-ours-diff-header-style"], merge_conflict_theirs_diff_header_style: styles diff --git a/src/options/set.rs b/src/options/set.rs index 8677d158f..2fae345bf 100644 --- a/src/options/set.rs +++ b/src/options/set.rs @@ -174,6 +174,7 @@ pub fn set_options( map_styles, max_line_distance, max_line_length, + max_syntax_length, // Hack: minus-style must come before minus-*emph-style because the latter default // dynamically to the value of the former. merge_conflict_begin_symbol, diff --git a/src/paint.rs b/src/paint.rs index 4b487ea38..aaa53e50a 100644 --- a/src/paint.rs +++ b/src/paint.rs @@ -696,11 +696,40 @@ pub fn get_syntax_style_sections_for_lines<'a>( ) { (Some(highlighter), true) => { for (line, _) in lines.iter() { - line_sections.push( - highlighter - .highlight_line(line, &config.syntax_set) - .unwrap(), - ); + // Fast but simple length comparison. Overcounts non-printable ansi + // characters or wider UTF-8, but `truncate_str_short` in the + // else branch corrects that. + if line.len() < config.max_syntax_length || config.max_syntax_length == 0 { + line_sections.push( + highlighter + .highlight_line(line, &config.syntax_set) + .unwrap(), + ); + } else { + let line_syntax = ansi::truncate_str_short(line, config.max_syntax_length); + // Re-split to get references into `line` with correct lifetimes. + // SAFETY: slicing the string is safe because `truncate_str_short` always + // returns a prefix of the input and only cuts at grapheme borders. + let (with_syntax, plain) = line.split_at(line_syntax.len()); + // Note: splitting a line and only feeding one half to the highlighter may + // result in wrong highlighting until it is reset the next hunk. + // + // Also, as lines are no longer newline terminated they might not be + // highlighted correctly, and because of lifetimes inserting '\n' here is not + // possible, also see `prepare()`. + line_sections.push( + highlighter + .highlight_line(with_syntax, &config.syntax_set) + .unwrap(), + ); + + if !plain.is_empty() { + line_sections + .last_mut() + .unwrap() + .push((config.null_syntect_style, plain)); + } + } } } _ => { diff --git a/src/tests/test_example_diffs.rs b/src/tests/test_example_diffs.rs index 9ee3ca5bb..c91c8bd52 100644 --- a/src/tests/test_example_diffs.rs +++ b/src/tests/test_example_diffs.rs @@ -1994,6 +1994,149 @@ src/align.rs:71: impl<'a> Alignment<'a> { │ .expect_after_header("#partial\n\nremoved: a"); } + #[test] + fn test_lines_with_syntax_width_limit() { + let result = DeltaTest::with_args(&[ + "--max-line-length=42", + "--max-syntax-highlighting-length=18", + ]) + .explain_ansi() + .with_input(GIT_DIFF_SINGLE_HUNK); + assert_snapshot!(result.output, @r###" + (normal)commit 94907c0f136f46dc46ffae2dc92dca9af7(reverse normal)→(normal) + Author: Dan Davison (149)Alignmen(normal)t<'a> { (blue)│(normal) + (blue)─────────────────────────────(blue)┘(normal) + + (231) (203)for(231) (i, x_(normal)i) in self.x.iter().en→ + (231) (203)for(231) (j(normal), y_j) in self.y.iter(→ + (normal 52) let (left, diag, up) =(normal 124) ((normal) + (normal 52) self.index(i, j + 1(normal 124)),(normal) + (normal 52) self.index(i, j),(normal) + (normal 52) self.index(i + 1, j),(normal) + (normal 52) );(normal) + (231 22) le(normal 22)t (left, diag, up) =(normal) + (231 22) (normal 22) (normal 28)((normal 22)self.index(i, j + 1(normal 28)→(normal) + (231) le(normal)t candidates = [ + (231) (normal) Cell { + (231) (normal) parent: left, + "###); + } + + #[test] + fn test_lines_with_syntax_width_limit_wrapping() { + let result = DeltaTest::with_args(&[ + "--side-by-side", + "--width=55", + "--wrap-max-lines=1", + "--max-line-length=10", // this gets ignored! + "--max-syntax-highlighting-length=22", + ]) + .explain_ansi() + .with_input(GIT_DIFF_SINGLE_HUNK); + + // eprintln!("{}", result.raw_output); + assert_snapshot!(result.output, @r###" + (normal)commit 94907c0f136f46dc46ffae2dc92dca9af7eb7c2e + Author: Dan Davison + Date: Thu May 14 11:13:17 2020 -0400 + + rustfmt + + + (blue)src/align.rs(normal) + (blue)───────────────────────────────────────────────────────(normal) + + (blue)─────────────────────────────(blue)┐(normal) + (blue)71(normal):(231) (81)impl(231)<(203)'a(231)> (149)Alignment(231)<(203)'a(normal)> { (blue)│(normal) + (blue)─────────────────────────────(blue)┘(normal) + + (blue)│(238) 71 (blue)│(231) (203)for(231) (i, x_i)(blue)↵(blue) │(238) 71 (blue)│(231) (203)for(231) (i, x_i)(blue)↵(normal) + (blue)│(238) (blue)│(231) i(normal)n self.x.iter().en(reverse normal)→(blue) │(238) (blue)│(231) i(normal)n self.x.iter().en(reverse normal)→(normal) + (blue)│(238) 72 (blue)│(231) (203)for(231) (j, (blue)↵(blue) │(238) 72 (blue)│(231) (203)for(231) (j, (blue)↵(normal) + (blue)│(238) (blue)│(231)y_(normal)j) in self.y.iter((reverse normal)→(blue) │(238) (blue)│(231)y_(normal)j) in self.y.iter((reverse normal)→(normal) + (blue)│(88) 73 (blue)│(231 52) (81)let(231) (blue)↵(blue) │(28) 73 (blue)│(231 22) (81)let(231) (blue)↵(normal) + (blue)│(88) (blue)│(231 52)(l(normal 52)eft, diag, up) =(normal 124) ((normal 52) (blue) │(28) (blue)│(231 22)(l(normal 22)eft, diag, up) =(normal) + (blue)│(88) 74 (blue)│(231 52) (blue)↵(blue) │(28) 74 (blue)│(231 22) (blue)↵(normal) + (blue)│(88) (blue)│(231 52)se(normal 52)lf.index(i, j + 1),(blue) │(28) (blue)│(231 28)((normal 22)s(normal 22)elf.index(i, j + 1(reverse normal)→(normal) + (blue)│(88) 75 (blue)│(231 52) (blue)↵(blue) │(28) (blue)│(normal) + (blue)│(88) (blue)│(231 52)se(normal 52)lf.index(i, j),(normal 52) (blue) │(28) (blue)│(normal) + (blue)│(88) 76 (blue)│(231 52) (blue)↵(blue) │(28) (blue)│(normal) + (blue)│(88) (blue)│(231 52)se(normal 52)lf.index(i + 1, j),(blue) │(28) (blue)│(normal) + (blue)│(88) 77 (blue)│(231 52) );(normal 52) (blue) │(28) (blue)│(normal) + (blue)│(238) 78 (blue)│(231) (81)let(231) (blue)↵(blue) │(238) 75 (blue)│(231) (81)let(231) (blue)↵(normal) + (blue)│(238) (blue)│(231)ca(normal)ndidates = [ (blue) │(238) (blue)│(231)ca(normal)ndidates = [ + (blue)│(238) 79 (blue)│(231) (blue)↵(blue) │(238) 76 (blue)│(231) (blue)↵(normal) + (blue)│(238) (blue)│(231)Ce(normal)ll { (blue) │(238) (blue)│(231)Ce(normal)ll { + (blue)│(238) 80 (blue)│(231) (blue)↵(blue) │(238) 77 (blue)│(231) (blue)↵(normal) + (blue)│(238) (blue)│(231) (normal) parent: left, (blue) │(238) (blue)│(231) (normal) parent: left, + "###); + } + + #[test] + fn test_lines_with_syntax_width_unicode() { + let result = DeltaTest::with_args(&["--max-syntax-highlighting-length=11"]) + .explain_ansi() + .with_input(GIT_DIFF_ALL_UNICODE_W_FULLWIDTH); + + assert_snapshot!(result.output, @r###" + (normal) + + (blue)src/a(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (231)一æäöø€ÆÄÖ(normal)〇Øß一 + (231)一æäöø€ÆÄÖ(normal)〇Øß一 + (normal 52)二æäöø(normal 124)¢(normal 52)ÆÄÖ〇Øß二(normal) + (normal 52)二æäöø(normal 124)¢(normal 52)ÆÄÖ〇Øß二(normal) + (231 22)二æäöø(normal 28)€(normal 22)ÆÄÖ(normal 22)〇Øß二(normal) + (231 22)二æäöø(normal 28)€(normal 22)ÆÄÖ(normal 22)〇Øß二(normal) + (231)三æäöø€ÆÄÖ(normal)〇Øß三 + (231)三æäöø€ÆÄÖ(normal)〇Øß三 + (231)¶(normal) + "###); + + let result = DeltaTest::with_args(&[ + "--max-syntax-highlighting-length=10", + "--max-line-length=16", + ]) + .explain_ansi() + .with_input(GIT_DIFF_ALL_UNICODE_W_FULLWIDTH); + + // eprintln!("{}", result.raw_output); + assert_snapshot!(result.output, @r###" + (normal) + + (blue)src/a(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (231)一æäöø€ÆÄÖ(normal)〇Øß→ + (231)一æäöø€ÆÄÖ(normal)〇Øß→ + (normal 52)二æäöø(normal 124)¢(normal 52)ÆÄÖ〇Øß→(normal) + (normal 52)二æäöø(normal 124)¢(normal 52)ÆÄÖ〇Øß→(normal) + (231 22)二æäöø(normal 28)€(normal 22)ÆÄÖ(normal 22)〇Øß→(normal) + (231 22)二æäöø(normal 28)€(normal 22)ÆÄÖ(normal 22)〇Øß→(normal) + (231)三æäöø€ÆÄÖ(normal)〇Øß→ + (231)三æäöø€ÆÄÖ(normal)〇Øß→ + (231)¶(normal) + "###); + } + const GIT_DIFF_SINGLE_HUNK: &str = "\ commit 94907c0f136f46dc46ffae2dc92dca9af7eb7c2e Author: Dan Davison @@ -2964,5 +3107,22 @@ Date: Tue Jun 21 14:48:20 2022 +0200 diff --git a a new file mode 100644 index 0000000..e69de29 +"; + + const GIT_DIFF_ALL_UNICODE_W_FULLWIDTH: &str = " +diff --git a/src/a b/src/a +index 53f98b6..14d6caa 100644 +--- a/src/a ++++ b/src/a +@@ -1,7 +1,7 @@ + 一æäöø€ÆÄÖ〇Øß一 + 一æäöø€ÆÄÖ〇Øß一 +-二æäöø¢ÆÄÖ〇Øß二 +-二æäöø¢ÆÄÖ〇Øß二 ++二æäöø€ÆÄÖ〇Øß二 ++二æäöø€ÆÄÖ〇Øß二 + 三æäöø€ÆÄÖ〇Øß三 + 三æäöø€ÆÄÖ〇Øß三 + ¶ "; }