Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Render complex text, variant forms, emoji, etc. #1

Draft
wants to merge 18 commits into
base: astral-cjk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions build/generate-unicode-data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import * as fs from 'fs';

// Or https://www.unicode.org/Public/draft/UCD/ucd if the next Unicode version is finalized and awaiting publication.
const ucdBaseUrl = 'https://www.unicode.org/Public/UCD/latest/ucd';

async function getPropertyData(property: string, value: string): Promise<{[_: string]: string}> {
const indicSyllabicCategoryUrl = `${ucdBaseUrl}/${property.replaceAll('_', '')}.txt`;
const response = await fetch(indicSyllabicCategoryUrl);
if (!response.ok) {
throw new Error(`Unable to fetch latest Unicode character database file for ${property}: ${response.status}`);
}

const table = await response.text();
const header = table.match(/^# \w+-(\d+\.\d+\.\d+)\.txt\n# Date: (\d\d\d\d-\d\d-\d\d)/);
const tableRegExp = new RegExp(`^([0-9A-Z]{4,6}(?:..[0-9A-Z]{4,6})?)(?= *; ${value})`, 'gm');
const characterClass = table
.match(tableRegExp)
.map(record => record
.split('..')
.map(codePoint => (codePoint.length > 4) ? `\\u{${codePoint}}` : `\\u${codePoint}`)
.join('-'))
.join('');
return {
version: header && header[1],
date: header && header[2],
characterClass,
};
}

const indicSyllabicCategory = await getPropertyData('Indic_Syllabic_Category', 'Invisible_Stacker');

fs.writeFileSync('src/data/unicode_properties.ts',
`// This file is generated. Edit build/generate-unicode-data.ts, then run \`npm run generate-unicode-data\`.

/**
* Returns whether two grapheme clusters detected by \`Intl.Segmenter\` can be combined to prevent an invisible combining mark from appearing unexpectedly.
*/
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode ${indicSyllabicCategory.version}, published ${indicSyllabicCategory.date}.
// eslint-disable-next-line no-misleading-character-class
const terminalJoinersRegExp = /[\\u200D${indicSyllabicCategory.characterClass}]$/u;
return terminalJoinersRegExp.test(former) || /^\\p{gc=Mc}/u.test(latter);
}
`);
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
},
"scripts": {
"generate-dist-package": "node --no-warnings --loader ts-node/esm build/generate-dist-package.js",
"generate-unicode-data": "node --no-warnings --loader ts-node/esm build/generate-unicode-data.ts",
"generate-shaders": "node --no-warnings --loader ts-node/esm build/generate-shaders.ts",
"generate-struct-arrays": "node --no-warnings --loader ts-node/esm build/generate-struct-arrays.ts",
"generate-style-code": "node --no-warnings --loader ts-node/esm build/generate-style-code.ts",
Expand Down
12 changes: 6 additions & 6 deletions src/data/bucket/symbol_bucket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {ProgramConfigurationSet} from '../program_configuration';
import {TriangleIndexArray, LineIndexArray} from '../index_array_type';
import {transformText} from '../../symbol/transform_text';
import {mergeLines} from '../../symbol/merge_lines';
import {allowsVerticalWritingMode, stringContainsRTLText} from '../../util/script_detection';
import {allowsVerticalWritingMode, splitByGraphemeCluster, stringContainsRTLText} from '../../util/script_detection';
import {WritingMode} from '../../symbol/shaping';
import {loadGeometry} from '../load_geometry';
import {toEvaluationFeature} from '../evaluation_feature';
Expand Down Expand Up @@ -419,17 +419,17 @@ export class SymbolBucket implements Bucket {

private calculateGlyphDependencies(
text: string,
stack: {[_: number]: boolean},
stack: {[_: string]: boolean},
textAlongLine: boolean,
allowVerticalPlacement: boolean,
doesAllowVerticalWritingMode: boolean) {

for (const char of text) {
stack[char.codePointAt(0)] = true;
for (const {segment} of splitByGraphemeCluster(text)) {
stack[segment] = true;
if ((textAlongLine || allowVerticalPlacement) && doesAllowVerticalWritingMode) {
const verticalChar = verticalizedCharacterMap[char];
const verticalChar = verticalizedCharacterMap[segment];
if (verticalChar) {
stack[verticalChar.codePointAt(0)] = true;
stack[segment] = true;
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions src/data/unicode_properties.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This file is generated. Edit build/generate-unicode-data.ts, then run `npm run generate-unicode-data`.

/**
* Returns whether two grapheme clusters detected by `Intl.Segmenter` can be combined to prevent an invisible combining mark from appearing unexpectedly.
*/
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode 16.0.0, published 2024-04-30.
// eslint-disable-next-line no-misleading-character-class
const terminalJoinersRegExp = /[\u200D\u1039\u17D2\u1A60\u1BAB\uAAF6\u{10A3F}\u{11133}\u{113D0}\u{1193E}\u{11A47}\u{11A99}\u{11D45}\u{11D97}\u{11F42}]$/u;
return terminalJoinersRegExp.test(former) || /^\p{gc=Mc}/u.test(latter);
}
14 changes: 7 additions & 7 deletions src/render/glyph_atlas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export type GlyphPosition = {
*/
export type GlyphPositions = {
[_: string]: {
[_: number]: GlyphPosition;
[_: string]: GlyphPosition;
};
};

Expand All @@ -46,8 +46,8 @@ export class GlyphAtlas {
const glyphs = stacks[stack];
const stackPositions = positions[stack] = {};

for (const id in glyphs) {
const src = glyphs[+id];
for (const grapheme in glyphs) {
const src = glyphs[grapheme];
if (!src || src.bitmap.width === 0 || src.bitmap.height === 0) continue;

const bin = {
Expand All @@ -57,7 +57,7 @@ export class GlyphAtlas {
h: src.bitmap.height + 2 * padding
};
bins.push(bin);
stackPositions[id] = {rect: bin, metrics: src.metrics};
stackPositions[grapheme] = {rect: bin, metrics: src.metrics};
}
}

Expand All @@ -67,10 +67,10 @@ export class GlyphAtlas {
for (const stack in stacks) {
const glyphs = stacks[stack];

for (const id in glyphs) {
const src = glyphs[+id];
for (const grapheme in glyphs) {
const src = glyphs[grapheme];
if (!src || src.bitmap.width === 0 || src.bitmap.height === 0) continue;
const bin = positions[stack][id].rect;
const bin = positions[stack][grapheme].rect;
AlphaImage.copy(src.bitmap, image, {x: 0, y: 0}, {x: bin.x + padding, y: bin.y + padding}, src.bitmap);
}
}
Expand Down
48 changes: 24 additions & 24 deletions src/render/glyph_manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {RequestManager} from '../util/request_manager';
describe('GlyphManager', () => {
const GLYPHS = {};
for (const glyph of parseGlyphPbf(fs.readFileSync('./test/unit/assets/0-255.pbf'))) {
GLYPHS[glyph.id] = glyph;
GLYPHS[glyph.grapheme] = glyph;
}

const identityTransform = ((url) => ({url})) as any as RequestManager;
Expand Down Expand Up @@ -35,22 +35,22 @@ describe('GlyphManager', () => {
createLoadGlyphRangeStub();
const manager = createGlyphManager();

const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [55]});
expect(returnedGlyphs['Arial Unicode MS']['55'].metrics.advance).toBe(12);
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['7']});
expect(returnedGlyphs['Arial Unicode MS']['7'].metrics.advance).toBe(12);
});

test('GlyphManager doesn\'t request twice 0-255 PBF if a glyph is missing', async () => {
const stub = createLoadGlyphRangeStub();
const manager = createGlyphManager();

await manager.getGlyphs({'Arial Unicode MS': [0.5]});
await manager.getGlyphs({'Arial Unicode MS': ['文']});
expect(manager.entries['Arial Unicode MS'].ranges[0]).toBe(true);
expect(stub).toHaveBeenCalledTimes(1);

// We remove all requests as in getGlyphs code.
delete manager.entries['Arial Unicode MS'].requests[0];

await manager.getGlyphs({'Arial Unicode MS': [0.5]});
await manager.getGlyphs({'Arial Unicode MS': ['文']});
expect(manager.entries['Arial Unicode MS'].ranges[0]).toBe(true);
expect(stub).toHaveBeenCalledTimes(1);
});
Expand All @@ -62,8 +62,8 @@ describe('GlyphManager', () => {

const manager = createGlyphManager();

const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x5e73]});
expect(returnedGlyphs['Arial Unicode MS'][0x5e73]).toBeNull(); // The fixture returns a PBF without the glyph we requested
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['平']});
expect(returnedGlyphs['Arial Unicode MS']['平']).toBeNull(); // The fixture returns a PBF without the glyph we requested
});

test('GlyphManager requests remote non-BMP, non-CJK PBF', async () => {
Expand All @@ -74,8 +74,8 @@ describe('GlyphManager', () => {
const manager = createGlyphManager();

// Request Egyptian hieroglyph 𓃰
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x1e0f0]});
expect(returnedGlyphs['Arial Unicode MS'][0x1e0f0]).toBeNull(); // The fixture returns a PBF without the glyph we requested
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['𓃰']});
expect(returnedGlyphs['Arial Unicode MS']['𓃰']).toBeNull(); // The fixture returns a PBF without the glyph we requested
});

test('GlyphManager does not cache CJK chars that should be rendered locally', async () => {
Expand All @@ -92,11 +92,11 @@ describe('GlyphManager', () => {
const manager = createGlyphManager('sans-serif');

//Request char that overlaps Katakana range
let returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x3005]});
expect(returnedGlyphs['Arial Unicode MS'][0x3005]).not.toBeNull();
let returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['々']});
expect(returnedGlyphs['Arial Unicode MS']['々']).not.toBeNull();
//Request char from Katakana range (te テ)
returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x30C6]});
const glyph = returnedGlyphs['Arial Unicode MS'][0x30c6];
returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['テ']});
const glyph = returnedGlyphs['Arial Unicode MS']['テ'];
//Ensure that te is locally generated.
expect(glyph.bitmap.height).toBe(12);
expect(glyph.bitmap.width).toBe(12);
Expand All @@ -106,32 +106,32 @@ describe('GlyphManager', () => {
const manager = createGlyphManager('sans-serif');

// Chinese character píng 平
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x5e73]});
expect(returnedGlyphs['Arial Unicode MS'][0x5e73].metrics.advance).toBe(0.5);
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['平']});
expect(returnedGlyphs['Arial Unicode MS']['平'].metrics.advance).toBe(0.5);
});

test('GlyphManager generates non-BMP CJK PBF locally', async () => {
const manager = createGlyphManager('sans-serif');

// Chinese character biáng 𰻞
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x30EDE]});
expect(returnedGlyphs['Arial Unicode MS'][0x30EDE].metrics.advance).toBe(1);
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['𰻞']});
expect(returnedGlyphs['Arial Unicode MS']['𰻞'].metrics.advance).toBe(1);
});

test('GlyphManager generates Katakana PBF locally', async () => {
const manager = createGlyphManager('sans-serif');

// Katakana letter te テ
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x30c6]});
expect(returnedGlyphs['Arial Unicode MS'][0x30c6].metrics.advance).toBe(0.5);
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['テ']});
expect(returnedGlyphs['Arial Unicode MS']['テ'].metrics.advance).toBe(0.5);
});

test('GlyphManager generates Hiragana PBF locally', async () => {
const manager = createGlyphManager('sans-serif');

//Hiragana letter te て
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x3066]});
expect(returnedGlyphs['Arial Unicode MS'][0x3066].metrics.advance).toBe(0.5);
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['て']});
expect(returnedGlyphs['Arial Unicode MS']['て'].metrics.advance).toBe(0.5);
});

test('GlyphManager consistently generates CJKV text locally', async () => {
Expand Down Expand Up @@ -159,9 +159,9 @@ describe('GlyphManager', () => {
});

// Katakana letter te
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': [0x30c6]});
expect(returnedGlyphs['Arial Unicode MS'][0x30c6].metrics.advance).toBe(24);
await manager.getGlyphs({'Arial Unicode MS': [0x30c6]});
const returnedGlyphs = await manager.getGlyphs({'Arial Unicode MS': ['テ']});
expect(returnedGlyphs['Arial Unicode MS']['テ'].metrics.advance).toBe(24);
await manager.getGlyphs({'Arial Unicode MS': ['テ']});
expect(drawSpy).toHaveBeenCalledTimes(1);
});
});
Loading