Skip to content

Commit

Permalink
Fixed Mon–Burmese combining diacritics
Browse files Browse the repository at this point in the history
If a grapheme cluster begins with a combining diacritic, combine it with the preceding grapheme cluster to avoid drawing diacritics over dotted circles.
  • Loading branch information
1ec5 committed Aug 19, 2024
1 parent 85c5cca commit 7d50181
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 30 deletions.
12 changes: 5 additions & 7 deletions src/data/bucket/symbol_bucket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {ProgramConfigurationSet} from '../program_configuration';
import {TriangleIndexArray, LineIndexArray} from '../index_array_type';
import {transformText} from '../../symbol/transform_text';
import {mergeLines} from '../../symbol/merge_lines';
import {allowsVerticalWritingMode, stringContainsRTLText} from '../../util/script_detection';
import {allowsVerticalWritingMode, splitByGraphemeCluster, stringContainsRTLText} from '../../util/script_detection';
import {WritingMode} from '../../symbol/shaping';
import {loadGeometry} from '../load_geometry';
import {toEvaluationFeature} from '../evaluation_feature';
Expand Down Expand Up @@ -108,8 +108,6 @@ const shaderOpacityAttributes = [
{name: 'a_fade_opacity', components: 1, type: 'Uint8' as ViewType, offset: 0}
];

const segmenter = new Intl.Segmenter();

function addVertex(
array: StructArray,
anchorX: number,
Expand Down Expand Up @@ -426,12 +424,12 @@ export class SymbolBucket implements Bucket {
allowVerticalPlacement: boolean,
doesAllowVerticalWritingMode: boolean) {

for (const {segment} of segmenter.segment(text)) {
stack[segment] = true;
for (const grapheme of splitByGraphemeCluster(text)) {
stack[grapheme] = true;
if ((textAlongLine || allowVerticalPlacement) && doesAllowVerticalWritingMode) {
const verticalChar = verticalizedCharacterMap[segment];
const verticalChar = verticalizedCharacterMap[grapheme];
if (verticalChar) {
stack[segment] = true;
stack[grapheme] = true;
}
}
}
Expand Down
45 changes: 22 additions & 23 deletions src/symbol/shaping.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {
charHasUprightVerticalOrientation,
charAllowsIdeographicBreaking,
charInComplexShapingScript
charInComplexShapingScript,
splitByGraphemeCluster
} from '../util/script_detection';
import {verticalizePunctuation} from '../util/verticalize_punctuation';
import {rtlWorkerPlugin} from '../source/rtl_text_plugin_worker';
Expand Down Expand Up @@ -75,8 +76,6 @@ export type TextJustify = 'left' | 'center' | 'right';
const PUAbegin = 0xE000;
const PUAend = 0xF8FF;

const segmenter = new Intl.Segmenter();

export class SectionOptions {
// Text options
scale: number;
Expand Down Expand Up @@ -132,7 +131,7 @@ export class TaggedString {
}

length(): number {
return Array.from(segmenter.segment(this.text)).length;
return splitByGraphemeCluster(this.text).length;
}

getSection(index: number): SectionOptions {
Expand All @@ -159,7 +158,7 @@ export class TaggedString {

substring(start: number, end: number): TaggedString {
const substring = new TaggedString();
substring.text = Array.from(segmenter.segment(this.text)).slice(start, end).map(s => s.segment).join('');
substring.text = splitByGraphemeCluster(this.text).slice(start, end).join('');
substring.sectionIndex = this.sectionIndex.slice(start, end);
substring.sections = this.sections;
return substring;
Expand All @@ -169,7 +168,7 @@ export class TaggedString {
* Converts a grapheme cluster index to a UTF-16 code unit (JavaScript character index).
*/
codeUnitIndex(unicodeIndex: number): number {
return Array.from(segmenter.segment(this.text)).slice(0, unicodeIndex).map(s => s.segment).join('').length;
return splitByGraphemeCluster(this.text).slice(0, unicodeIndex).join('').length;
}

toString(): string {
Expand All @@ -185,7 +184,7 @@ export class TaggedString {
this.sections.push(SectionOptions.forText(section.scale, section.fontStack || defaultFontStack));
const index = this.sections.length - 1;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
for (const char of segmenter.segment(section.text)) {
for (const char of splitByGraphemeCluster(section.text)) {
this.sectionIndex.push(index);
}
}
Expand Down Expand Up @@ -280,7 +279,7 @@ function shapeText(
taggedLine.text = line;
taggedLine.sections = logicalInput.sections;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
for (const char of segmenter.segment(line)) {
for (const char of splitByGraphemeCluster(line)) {
taggedLine.sectionIndex.push(0);
}
lines.push(taggedLine);
Expand Down Expand Up @@ -402,9 +401,9 @@ function determineAverageLineWidth(logicalInput: TaggedString,
let totalWidth = 0;

let index = 0;
for (const {segment} of segmenter.segment(logicalInput.text)) {
for (const grapheme of splitByGraphemeCluster(logicalInput.text)) {
const section = logicalInput.getSection(index);
totalWidth += getGlyphAdvance(segment, section, glyphMap, imagePositions, spacing, layoutTextSize);
totalWidth += getGlyphAdvance(grapheme, section, glyphMap, imagePositions, spacing, layoutTextSize);
index++;
}

Expand Down Expand Up @@ -524,28 +523,28 @@ export function determineLineBreaks(
let currentX = 0;

let i = 0;
const chars = segmenter.segment(logicalInput.text)[Symbol.iterator]();
const chars = splitByGraphemeCluster(logicalInput.text)[Symbol.iterator]();
let char = chars.next();
const nextChars = segmenter.segment(logicalInput.text)[Symbol.iterator]();
const nextChars = splitByGraphemeCluster(logicalInput.text)[Symbol.iterator]();
nextChars.next();
let nextChar = nextChars.next();
const nextNextChars = segmenter.segment(logicalInput.text)[Symbol.iterator]();
const nextNextChars = splitByGraphemeCluster(logicalInput.text)[Symbol.iterator]();
nextNextChars.next();
nextNextChars.next();
let nextNextChar = nextNextChars.next();

while (!char.done) {
const section = logicalInput.getSection(i);
const segment = char.value.segment;
const codePoint = segment.codePointAt(0);
const segment = char.value;
const codePoint = segment.codePointAt(0);
if (!whitespace[codePoint]) currentX += getGlyphAdvance(segment, section, glyphMap, imagePositions, spacing, layoutTextSize);

// Ideographic characters, spaces, and word-breaking punctuation that often appear without
// surrounding spaces.
if (!nextChar.done) {
const ideographicBreak = charAllowsIdeographicBreaking(codePoint);
const nextSegment = nextChar.value.segment;
const nextCodePoint = nextSegment.codePointAt(0);
const nextSegment = nextChar.value;
const nextCodePoint = nextSegment.codePointAt(0);
if (breakable[codePoint] || ideographicBreak || section.imageName || (!nextNextChar.done && breakableBefore[nextCodePoint])) {

potentialLineBreaks.push(
Expand Down Expand Up @@ -655,10 +654,10 @@ function shapeLines(shaping: Shaping,
}

let i = 0;
for (const {segment} of segmenter.segment(line.text)) {
for (const grapheme of splitByGraphemeCluster(line.text)) {
const section = line.getSection(i);
const sectionIndex = line.getSectionIndex(i);
const codePoint = segment.codePointAt(0);
const codePoint = grapheme.codePointAt(0);
let baselineOffset = 0.0;
let metrics = null;
let rect = null;
Expand All @@ -673,13 +672,13 @@ function shapeLines(shaping: Shaping,

if (!section.imageName) {
const positions = glyphPositions[section.fontStack];
const glyphPosition = positions && positions[segment];
const glyphPosition = positions && positions[grapheme];
if (glyphPosition && glyphPosition.rect) {
rect = glyphPosition.rect;
metrics = glyphPosition.metrics;
} else {
const glyphs = glyphMap[section.fontStack];
const glyph = glyphs && glyphs[segment];
const glyph = glyphs && glyphs[grapheme];
if (!glyph) continue;
metrics = glyph.metrics;
}
Expand Down Expand Up @@ -722,11 +721,11 @@ function shapeLines(shaping: Shaping,
}

if (!vertical) {
positionedGlyphs.push({glyph: segment, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
positionedGlyphs.push({glyph: grapheme, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
x += metrics.advance * section.scale + spacing;
} else {
shaping.verticalizable = true;
positionedGlyphs.push({glyph: segment, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
positionedGlyphs.push({glyph: grapheme, imageName, x, y: y + baselineOffset, vertical, scale: section.scale, fontStack: section.fontStack, sectionIndex, metrics, rect});
x += verticalAdvance * section.scale + spacing;
}

Expand Down
25 changes: 25 additions & 0 deletions src/util/script_detection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,31 @@

import {unicodeBlockLookup as isChar} from './is_char_in_unicode_block';

const segmenter = new Intl.Segmenter();

export function splitByGraphemeCluster(text: string) {
const segments = segmenter.segment(text)[Symbol.iterator]();
let segment = segments.next();
const nextSegments = segmenter.segment(text)[Symbol.iterator]();
nextSegments.next();
let nextSegment = nextSegments.next();

const baseSegments = [];
while (!segment.done) {
const baseSegment = segment;
while (!nextSegment.done && /^\p{Mc}/u.test(nextSegment.value.segment)) {
baseSegment.value.segment += nextSegment.value.segment;
segment = segments.next();
nextSegment = nextSegments.next();
}
baseSegments.push(baseSegment.value.segment);
segment = segments.next();
nextSegment = nextSegments.next();
}

return baseSegments;
}

export function allowsIdeographicBreaking(chars: string) {
for (const char of chars) {
if (!charAllowsIdeographicBreaking(char.codePointAt(0))) return false;
Expand Down

0 comments on commit 7d50181

Please sign in to comment.