From 99b6987477879211b48c1e1ba3988a571f3a5e23 Mon Sep 17 00:00:00 2001 From: Jan Lelis Date: Wed, 4 Jan 2023 12:13:03 +0100 Subject: [PATCH] Avoid overwrite lookup if no overwrites are set --- CHANGELOG.md | 3 +++ lib/unicode/display_width.rb | 48 +++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7e2f7d..2b01f9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,10 @@ ## 2.4.2 (unreleased) +More performance improvements: + - Optimize lookup of first 4096 codepoints +- Avoid overwrite lookup if no overwrites are set ## 2.4.1 diff --git a/lib/unicode/display_width.rb b/lib/unicode/display_width.rb index 77816e6..bbd7026 100644 --- a/lib/unicode/display_width.rb +++ b/lib/unicode/display_width.rb @@ -10,16 +10,51 @@ class DisplayWidth FIRST_4096 = decompress_index(INDEX[0][0], 1) def self.of(string, ambiguous = 1, overwrite = {}, options = {}) - # Optimization for ASCII-only strings without certain control symbols - if overwrite.empty? && string.ascii_only? - if string.match?(ASCII_NON_ZERO_REGEX) - res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b") - return res < 0 ? 0 : res + if overwrite.empty? + # Optimization for ASCII-only strings without certain control symbols + if string.ascii_only? + if string.match?(ASCII_NON_ZERO_REGEX) + res = string.gsub(ASCII_NON_ZERO_REGEX, "").size - string.count("\b") + res < 0 ? 0 : res + else + string.size + end else - return string.size + width_no_overwrite(string, ambiguous, options) end + else + width_all_features(string, ambiguous, overwrite, options) end + end + def self.width_no_overwrite(string, ambiguous, options = {}) + # Sum of all chars widths + res = string.codepoints.sum{ |codepoint| + if codepoint > 15 && codepoint < 161 # very common + next 1 + elsif codepoint < 0x1001 + width = FIRST_4096[codepoint] + else + width = INDEX + depth = INITIAL_DEPTH + while (width = width[codepoint / depth]).instance_of? Array + codepoint %= depth + depth /= 16 + end + end + + width == :A ? ambiguous : (width || 1) + } + + # Substract emoji error + res -= emoji_extra_width_of(string, ambiguous) if options[:emoji] + + # Return result + prevent negative lengths + res < 0 ? 0 : res + end + + # Same as .width_no_overwrite - but with applying overwrites for each char + def self.width_all_features(string, ambiguous, overwrite, options) # Sum of all chars widths res = string.codepoints.sum{ |codepoint| next overwrite[codepoint] if overwrite[codepoint] @@ -47,6 +82,7 @@ def self.of(string, ambiguous = 1, overwrite = {}, options = {}) res < 0 ? 0 : res end + def self.emoji_extra_width_of(string, ambiguous = 1, overwrite = {}, _ = {}) require "unicode/emoji"