Skip to content

Commit

Permalink
Streamlined RTL text detection
Browse files Browse the repository at this point in the history
  • Loading branch information
1ec5 committed Aug 22, 2024
1 parent 5209ec8 commit 91e523c
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 23 deletions.
24 changes: 12 additions & 12 deletions src/util/script_detection.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {charAllowsIdeographicBreaking, charAllowsLetterSpacing, charHasUprightVerticalOrientation, charInComplexShapingScript, charInRTLScript} from './script_detection';
import {charAllowsIdeographicBreaking, charAllowsLetterSpacing, charHasUprightVerticalOrientation, charInComplexShapingScript, stringContainsRTLText} from './script_detection';

describe('charAllowsIdeographicBreaking', () => {
test('disallows ideographic breaking of Latin text', () => {
Expand Down Expand Up @@ -107,35 +107,35 @@ describe('charInComplexShapingScript', () => {
});
});

describe('charInRTLScript', () => {
describe('stringContainsRTLText', () => {
test('does not identify direction-neutral text as right-to-left', () => {
expect(charInRTLScript('3'.codePointAt(0))).toBe(false);
expect(stringContainsRTLText('3')).toBe(false);
});

test('identifies Arabic text as right-to-left', () => {
// Arabic
expect(charInRTLScript('۳'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('۳')).toBe(true);
// Arabic Supplement
expect(charInRTLScript('ݣ'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ݣ')).toBe(true);
// Arabic Extended-A
expect(charInRTLScript('ࢳ'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ࢳ')).toBe(true);
// Arabic Extended-B
expect(charInRTLScript('࢐'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('࢐')).toBe(true);
// Arabic Presentation Forms-A
expect(charInRTLScript('ﰤ'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ﰤ')).toBe(true);
// Arabic Presentation Forms-B
expect(charInRTLScript('ﺽ'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ﺽ')).toBe(true);
});

test('identifies Hebrew text as right-to-left', () => {
// Hebrew
expect(charInRTLScript('ה'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ה')).toBe(true);
// Alphabetic Presentation Forms
expect(charInRTLScript('ﬡ'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ﬡ')).toBe(true);
});

test('identifies Thaana text as right-to-left', () => {
// Thaana
expect(charInRTLScript('ޘ'.codePointAt(0))).toBe(true);
expect(stringContainsRTLText('ޘ')).toBe(true);
});
});
13 changes: 2 additions & 11 deletions src/util/script_detection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,6 @@ const rtlScriptCodes = [

const rtlScriptRegExp = sanitizedRegExpFromScriptCodes(rtlScriptCodes);

export function charInRTLScript(char: number) {
return rtlScriptRegExp.test(String.fromCodePoint(char));
}

export function charInSupportedScript(char: number, canRenderRTL: boolean) {
// This is a rough heuristic: whether we "can render" a script
// actually depends on the properties of the font being used
Expand All @@ -401,7 +397,7 @@ export function charInSupportedScript(char: number, canRenderRTL: boolean) {

// Even in Latin script, we "can't render" combinations such as the fi
// ligature, but we don't consider that semantically significant.
if (!canRenderRTL && charInRTLScript(char)) {
if (!canRenderRTL && rtlScriptRegExp.test(String.fromCodePoint(char))) {
return false;
}
if ((char >= 0x0900 && char <= 0x0DFF) ||
Expand All @@ -419,12 +415,7 @@ export function charInSupportedScript(char: number, canRenderRTL: boolean) {
}

export function stringContainsRTLText(chars: string): boolean {
for (const char of chars) {
if (charInRTLScript(char.codePointAt(0))) {
return true;
}
}
return false;
return rtlScriptRegExp.test(chars);
}

export function isStringInSupportedScript(chars: string, canRenderRTL: boolean) {
Expand Down

0 comments on commit 91e523c

Please sign in to comment.