Fixed Mon–Burmese combining diacritics

If a grapheme cluster begins with a combining diacritic, combine it with the preceding grapheme cluster to avoid drawing diacritics over dotted circles.
1ec5 · Aug 19, 2024 · 7d50181 · 7d50181
1 parent 85c5cca
commit 7d50181
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 30 deletions.
diff --git a/src/data/bucket/symbol_bucket.ts b/src/data/bucket/symbol_bucket.ts
@@ -23,7 +23,7 @@ import {ProgramConfigurationSet} from '../program_configuration';
 import {TriangleIndexArray, LineIndexArray} from '../index_array_type';
 import {transformText} from '../../symbol/transform_text';
 import {mergeLines} from '../../symbol/merge_lines';
-import {allowsVerticalWritingMode, stringContainsRTLText} from '../../util/script_detection';
+import {allowsVerticalWritingMode, splitByGraphemeCluster, stringContainsRTLText} from '../../util/script_detection';
 import {WritingMode} from '../../symbol/shaping';
 import {loadGeometry} from '../load_geometry';
 import {toEvaluationFeature} from '../evaluation_feature';
@@ -108,8 +108,6 @@ const shaderOpacityAttributes = [
     {name: 'a_fade_opacity', components: 1, type: 'Uint8' as ViewType, offset: 0}
 ];
 
-const segmenter = new Intl.Segmenter();
-
 function addVertex(
     array: StructArray,
     anchorX: number,
@@ -426,12 +424,12 @@ export class SymbolBucket implements Bucket {
         allowVerticalPlacement: boolean,
         doesAllowVerticalWritingMode: boolean) {
 
-        for (const {segment} of segmenter.segment(text)) {
-            stack[segment] = true;
+        for (const grapheme of splitByGraphemeCluster(text)) {
+            stack[grapheme] = true;
             if ((textAlongLine || allowVerticalPlacement) && doesAllowVerticalWritingMode) {
-                const verticalChar = verticalizedCharacterMap[segment];
+                const verticalChar = verticalizedCharacterMap[grapheme];
                 if (verticalChar) {
-                    stack[segment] = true;
+                    stack[grapheme] = true;
                 }
             }
         }

diff --git a/src/symbol/shaping.ts b/src/symbol/shaping.ts
@@ -1,7 +1,8 @@
 import {
     charHasUprightVerticalOrientation,
     charAllowsIdeographicBreaking,
-    charInComplexShapingScript
+    charInComplexShapingScript,
+    splitByGraphemeCluster
 } from '../util/script_detection';
 import {verticalizePunctuation} from '../util/verticalize_punctuation';
 import {rtlWorkerPlugin} from '../source/rtl_text_plugin_worker';
@@ -75,8 +76,6 @@ export type TextJustify = 'left' | 'center' | 'right';
 const PUAbegin = 0xE000;
 const PUAend = 0xF8FF;
 
-const segmenter = new Intl.Segmenter();
-
 export class SectionOptions {
     // Text options
     scale: number;
@@ -132,7 +131,7 @@ export class TaggedString {
     }
 
     length(): number {
-        return Array.from(segmenter.segment(this.text)).length;
+        return splitByGraphemeCluster(this.text).length;
     }
 
     getSection(index: number): SectionOptions {
@@ -159,7 +158,7 @@ export class TaggedString {
 
     substring(start: number, end: number): TaggedString {
         const substring = new TaggedString();
-        substring.text = Array.from(segmenter.segment(this.text)).slice(start, end).map(s => s.segment).join('');
+        substring.text = splitByGraphemeCluster(this.text).slice(start, end).join('');
         substring.sectionIndex = this.sectionIndex.slice(start, end);
         substring.sections = this.sections;
         return substring;
@@ -169,7 +168,7 @@ export class TaggedString {
      * Converts a grapheme cluster index to a UTF-16 code unit (JavaScript character index).
      */
     codeUnitIndex(unicodeIndex: number): number {
-        return Array.from(segmenter.segment(this.text)).slice(0, unicodeIndex).map(s => s.segment).join('').length;
+        return splitByGraphemeCluster(this.text).slice(0, unicodeIndex).join('').length;
     }
 
     toString(): string {
@@ -185,7 +184,7 @@ export class TaggedString {
         this.sections.push(SectionOptions.forText(section.scale, section.fontStack || defaultFontStack));
         const index = this.sections.length - 1;
         // eslint-disable-next-line @typescript-eslint/no-unused-vars
-        for (const char of segmenter.segment(section.text)) {
+        for (const char of splitByGraphemeCluster(section.text)) {
             this.sectionIndex.push(index);
         }
     }
@@ -280,7 +279,7 @@ function shapeText(
             taggedLine.text = line;
             taggedLine.sections = logicalInput.sections;
             // eslint-disable-next-line @typescript-eslint/no-unused-vars
-            for (const char of segmenter.segment(line)) {
+            for (const char of splitByGraphemeCluster(line)) {
                 taggedLine.sectionIndex.push(0);
             }
             lines.push(taggedLine);
@@ -402,9 +401,9 @@ function determineAverageLineWidth(logicalInput: TaggedString,
     let totalWidth = 0;
 
     let index = 0;
-	for (const {segment} of segmenter.segment(logicalInput.text)) {
+    for (const grapheme of splitByGraphemeCluster(logicalInput.text)) {
         const section = logicalInput.getSection(index);
-        totalWidth += getGlyphAdvance(segment, section, glyphMap, imagePositions, spacing, layoutTextSize);
+        totalWidth += getGlyphAdvance(grapheme, section, glyphMap, imagePositions, spacing, layoutTextSize);
         index++;
     }
 
@@ -524,28 +523,28 @@ export function determineLineBreaks(
     let currentX = 0;
 
     let i = 0;
-    const chars = segmenter.segment(logicalInput.text)[Symbol.iterator]();
+    const chars = splitByGraphemeCluster(logicalInput.text)[Symbol.iterator]();
     let char = chars.next();
-    const nextChars = segmenter.segment(logicalInput.text)[Symbol.iterator]();
+    const nextChars = splitByGraphemeCluster(logicalInput.text)[Symbol.iterator]();
     nextChars.next();
     let nextChar = nextChars.next();
-    const nextNextChars = segmenter.segment(logicalInput.text)[Symbol.iterator]();
+    const nextNextChars = splitByGraphemeCluster(logicalInput.text)[Symbol.iterator]();
     nextNextChars.next();
     nextNextChars.next();
     let nextNextChar = nextNextChars.next();
 
     while (!char.done) {
         const section = logicalInput.getSection(i);
-        const segment = char.value.segment;
-		const codePoint = segment.codePointAt(0);
+        const segment = char.value;
+        const codePoint = segment.codePointAt(0);
         if (!whitespace[codePoint]) currentX += getGlyphAdvance(segment, section, glyphMap, imagePositions, spacing, layoutTextSize);
 
         // Ideographic characters, spaces, and word-breaking punctuation that often appear without
         // surrounding spaces.
         if (!nextChar.done) {
             const ideographicBreak = charAllowsIdeographicBreaking(codePoint);
-            const nextSegment = nextChar.value.segment;
-			const nextCodePoint = nextSegment.codePointAt(0);
+            const nextSegment = nextChar.value;
+            const nextCodePoint = nextSegment.codePointAt(0);
             if (breakable[codePoint] || ideographicBreak || section.imageName || (!nextNextChar.done && breakableBefore[nextCodePoint])) {
 
                 potentialLineBreaks.push(
@@ -655,10 +654,10 @@ function shapeLines(shaping: Shaping,
         }
 
         let i = 0;
-        for (const {segment} of segmenter.segment(line.text)) {
+        for (const grapheme of splitByGraphemeCluster(line.text)) {
             const section = line.getSection(i);
             const sectionIndex = line.getSectionIndex(i);
-            const codePoint = segment.codePointAt(0);
+            const codePoint = grapheme.codePointAt(0);
             let baselineOffset = 0.0;
             let metrics = null;
             let rect = null;
@@ -673,13 +672,13 @@ function shapeLines(shaping: Shaping,
 
             if (!section.imageName) {
                 const positions = glyphPositions[section.fontStack];
-                const glyphPosition = positions && positions[segment];
+                const glyphPosition = positions && positions[grapheme];
                 if (glyphPosition && glyphPosition.rect) {
                     rect = glyphPosition.rect;
                     metrics = glyphPosition.metrics;
                 } else {
                     const glyphs = glyphMap[section.fontStack];
-                    const glyph = glyphs && glyphs[segment];
+                    const glyph = glyphs && glyphs[grapheme];
                     if (!glyph) continue;
                     metrics = glyph.metrics;
                 }
@@ -722,11 +721,11 @@ function shapeLines(shaping: Shaping,
             }
 
             if (!vertical) {
-                positionedGlyphs.push({glyph: segment, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
+                positionedGlyphs.push({glyph: grapheme, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
                 x += metrics.advance * section.scale + spacing;
             } else {
                 shaping.verticalizable = true;
-                positionedGlyphs.push({glyph: segment, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
+                positionedGlyphs.push({glyph: grapheme, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
                 x += verticalAdvance * section.scale + spacing;
             }
 

diff --git a/src/util/script_detection.ts b/src/util/script_detection.ts
@@ -2,6 +2,31 @@
 
 import {unicodeBlockLookup as isChar} from './is_char_in_unicode_block';
 
+const segmenter = new Intl.Segmenter();
+
+export function splitByGraphemeCluster(text: string) {
+    const segments = segmenter.segment(text)[Symbol.iterator]();
+    let segment = segments.next();
+    const nextSegments = segmenter.segment(text)[Symbol.iterator]();
+    nextSegments.next();
+    let nextSegment = nextSegments.next();
+
+    const baseSegments = [];
+    while (!segment.done) {
+        const baseSegment = segment;
+        while (!nextSegment.done && /^\p{Mc}/u.test(nextSegment.value.segment)) {
+            baseSegment.value.segment += nextSegment.value.segment;
+            segment = segments.next();
+            nextSegment = nextSegments.next();
+        }
+        baseSegments.push(baseSegment.value.segment);
+        segment = segments.next();
+        nextSegment = nextSegments.next();
+    }
+
+    return baseSegments;
+}
+
 export function allowsIdeographicBreaking(chars: string) {
     for (const char of chars) {
         if (!charAllowsIdeographicBreaking(char.codePointAt(0))) return false;