Skip to content

Commit

Permalink
fix(color-contrast): ignore format unicode characters (#4102)
Browse files Browse the repository at this point in the history
* fix(color-contrast): ignore format unicode characters

* comment

* typo

* Apply suggestions from code review

Co-authored-by: Wilco Fiers <[email protected]>

---------

Co-authored-by: Wilco Fiers <[email protected]>
  • Loading branch information
straker and WilcoFiers authored Jul 26, 2023
1 parent c6e07be commit 049522e
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 76 deletions.
18 changes: 11 additions & 7 deletions lib/commons/text/has-unicode.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {
getUnicodeNonBmpRegExp,
getSupplementaryPrivateUseRegExp,
getPunctuationRegExp
getPunctuationRegExp,
getCategoryFormatRegExp
} from './unicode';
import emojiRegexText from 'emoji-regex';

Expand All @@ -20,19 +21,22 @@ import emojiRegexText from 'emoji-regex';
*/
function hasUnicode(str, options) {
const { emoji, nonBmp, punctuations } = options;
let value = false;

if (emoji) {
return emojiRegexText().test(str);
value ||= emojiRegexText().test(str);
}
if (nonBmp) {
return (
value ||=
getUnicodeNonBmpRegExp().test(str) ||
getSupplementaryPrivateUseRegExp().test(str)
);
getSupplementaryPrivateUseRegExp().test(str) ||
getCategoryFormatRegExp().test(str);
}
if (punctuations) {
return getPunctuationRegExp().test(str);
value ||= getPunctuationRegExp().test(str);
}
return false;

return value;
}

export default hasUnicode;
9 changes: 6 additions & 3 deletions lib/commons/text/remove-unicode.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {
getUnicodeNonBmpRegExp,
getSupplementaryPrivateUseRegExp,
getPunctuationRegExp
getPunctuationRegExp,
getCategoryFormatRegExp
} from './unicode.js';
import emojiRegexText from 'emoji-regex';

Expand All @@ -25,8 +26,10 @@ function removeUnicode(str, options) {
str = str.replace(emojiRegexText(), '');
}
if (nonBmp) {
str = str.replace(getUnicodeNonBmpRegExp(), '');
str = str.replace(getSupplementaryPrivateUseRegExp(), '');
str = str
.replace(getUnicodeNonBmpRegExp(), '')
.replace(getSupplementaryPrivateUseRegExp(), '')
.replace(getCategoryFormatRegExp(), '');
}
if (punctuations) {
str = str.replace(getPunctuationRegExp(), '');
Expand Down
12 changes: 12 additions & 0 deletions lib/commons/text/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,15 @@ export function getSupplementaryPrivateUseRegExp() {
// ┏━━━━━━┻━━━━━━┓┏━━━━━━┻━━━━━━┓
return /[\uDB80-\uDBBF][\uDC00-\uDFFF]/g;
}

/**
* Get regular expression for unicode format category.
* When we drop IE11 we can instead use unicode character escape `/p{Cf}/gu`
* Reference:
* - https://www.compart.com/en/unicode/category/Cf
*
* @returns {RegExp}
*/
export function getCategoryFormatRegExp() {
return /[\xAD\u0600-\u0605\u061C\u06DD\u070F\u08E2\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\uFFF9-\uFFFB]|\uD804[\uDCBD\uDCCD]|\uD80D[\uDC30-\uDC38]|\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|\uDB40[\uDC01\uDC20-\uDC7F]/g;
}
Loading

0 comments on commit 049522e

Please sign in to comment.