Skip to content

Commit

Permalink
Adding babel/localeData.json and its corresponding phet.chipper.local…
Browse files Browse the repository at this point in the history
…eData, and initial switches to use it in simulations and simulation builds, see phetsims/joist#963
  • Loading branch information
jonathanolson committed May 7, 2024
1 parent 6761447 commit 8e96bb5
Show file tree
Hide file tree
Showing 10 changed files with 309 additions and 52 deletions.
14 changes: 10 additions & 4 deletions js/LocalizedString.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,18 @@ class LocalizedString {
return localeOrder[ index + 1 ];
}
else {
// doesn't exist in those
if ( locale.includes( '_' ) ) {
return locale.slice( 0, 2 ) as Locale; // zh_CN => zh
if ( phet.chipper.localeData[ locale ] ) {
// Pick either the first fallback locale from our localeData, or English
return ( phet.chipper.localeData[ locale ].fallbackLocales || [ 'en' ] )[ 0 ];
}
else {
return 'en';
// doesn't exist in those
if ( locale.includes( '_' ) ) {
return locale.slice( 0, 2 ) as Locale; // zh_CN => zh
}
else {
return 'en';
}
}
}
}
Expand Down
153 changes: 153 additions & 0 deletions js/data/newUpdateLocaleInfo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright 2024, University of Colorado Boulder

/**
* TODO: Move over to updateLocaleInfo.js once we are ready to propagate the locale changes https://github.com/phetsims/joist/issues/963
*
* WARNING: This will commit/push the changes. Those changes likely be propagated immediately to the website and rosetta.
*
* NOTE: Run with CWD of chipper/js/data
*
* @author Matt Pennington (PhET Interactive Simulations)
*/

const child_process = require( 'child_process' );
const fs = require( 'fs' );

/**
* Converts locale data from babel/localeData.json into legacy formats used by rosetta and the website.
*
* Overall description of the localeData system:
*
* - babel/localeData.json - Ground truth, includes the "new" format with locale3 and englishName instead of name
* - chipper/js/data/localeInfo.js - CommonJS legacy module
* - chipper/js/data/localeInfoModule.js - ES6 legacy module
* - chipper/js/data/localeInfo.json - JSON legacy
*
* IMPORTANT - MUST READ!!!
* You may modify babel/localeData.json file with new locale information. After modifying the file you must take the following steps:
* 1. Run ./updateLocaleInfo.js, so that the automatically generated files are also update
* 2. Notify the responsible developers for rosetta, weddell, yotta, and the website that localeInfo was updated.
* 3. TODO figure out next steps, see https://github.com/phetsims/joist/issues/963
*
* Locale data was originally based on Java's Locale object, but has been modified. Essentially each locale has the
* following data:
*
* - locale: Either in the format `xx` or `xx_XX` (ISO-639-1 with 2-letter country code optional). Sometimes these
* do not match with ISO-639-1, we have had to add some for our needs.
* - language codes are ISO 639-1, see http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
* - country codes are ISO 3166-1 alpha2, see http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
*
* NOTE: We are using an older version of ISO 639-1 because java.util.Locale maps some of the newer language codes to
* older codes. See Locale.convertOldISOCodes.
* The affected country codes are:
* he -> iw (Hebrew)
* yi -> ji (Yiddish)
* id -> in (Indonesian)
* - locale3: Format of `xxx`. The ISO-639-2 code for the language (3-letter code), if available. Some locales do not
* have this information (most do).
* - direction: either `ltr` or `rtl` for left-to-right or right-to-left
* - englishName: The name of the locale in English
* - localizedName: The name of the locale in the locale itself
*
* ALSO NOTE: We had a request to support Lakota, which is not included in ISO 639-1, and is only defined as a three-
* letter code in ISO 639-3. The locale combination 'lk' was not taken in ISO 639-1, so we added it. Strictly
* speaking, this is a deviation from the spec.
*
* @author Jonathan Olson <[email protected]>
*/

// Load our ground source of truth
const localeData = JSON.parse( fs.readFileSync( '../../../babel/localeData.json', 'utf8' ) );

// Construct the concise JS that defines the legacy locale-info format
let localeInfoSnippet = '{';
// eslint-disable-next-line bad-text
const badText = 'Slave'; // There is an englishName that contains this word, see https://en.wikipedia.org/?title=Slave_language_(Athapascan)&redirect=no
// Add properties for all locales
for ( const locale of Object.keys( localeData ) ) {
localeInfoSnippet += `
${locale}: {
${localeData[ locale ].englishName.includes( badText ) ? '// eslint-disable-next-line bad-text\n ' : ''}name: '${localeData[ locale ].englishName.replace( /'/g, '\\\'' )}',
localizedName: '${localeData[ locale ].localizedName.replace( /'/g, '\\\'' )}',
direction: '${localeData[ locale ].direction}'
},`;
}
// Remove the trailing comma
localeInfoSnippet = localeInfoSnippet.slice( 0, -1 );
// Close the object
localeInfoSnippet += '\n}';

const localeInfo = {};
for ( const locale of Object.keys( localeData ) ) {
localeInfo[ locale ] = {
name: localeData[ locale ].englishName,
localizedName: localeData[ locale ].localizedName,
direction: localeData[ locale ].direction
};
}

const newLocaleInfo = {
_comment: 'This file is automatically generated by js/data/updateLocaleInfo.js. Do not modify it directly.',
...localeInfo
};

fs.writeFileSync( '../../data/localeInfo.json', JSON.stringify( newLocaleInfo, null, 2 ) );

const commonDocumentation = `// Copyright 2015-${new Date().getFullYear()}, University of Colorado Boulder
/**
* This file is automatically generated by js/data/updateLocaleInfo.js. Do not modify it directly.
*
* @author automatically generated by updateLocaleInfo.js
*/
/* eslint-env browser, node */
`;

const newCommonJSSouceCode = `${commonDocumentation}module.exports = ${localeInfoSnippet};`;
fs.writeFileSync( './localeInfo.js', newCommonJSSouceCode );

const newModuleSourceCode = `${commonDocumentation}export default ${localeInfoSnippet};`;
fs.writeFileSync( './localeInfoModule.js', newModuleSourceCode );

console.log( 'locale info files updated' );

throw new Error( 'NO COMMIT YET, safeguard so we do not commit changes to main yet' ); // TODO: remove for https://github.com/phetsims/joist/issues/963

// eslint-disable-next-line no-unreachable
let needsCommit = false;
try {

// 0 exit code if there are no working copy changes from HEAD.
child_process.execSync( 'git diff-index --quiet HEAD --' );
console.log( 'No locale info changes, no commit needed.' );
}
catch( e ) {
needsCommit = true;
}

if ( needsCommit ) {
try {

console.log( 'pulling' );

// Some devs have rebase set by default, and you cannot rebase-pull with working copy changes.
child_process.execSync( 'git pull --no-rebase' );

child_process.execSync( 'git add ../../data/localeInfo.json' );
child_process.execSync( 'git add ./localeInfo.js' );
child_process.execSync( 'git add ./localeInfoModule.js' );

if ( needsCommit ) {
console.log( 'committing' );
child_process.execSync( 'git commit --no-verify ../../data/localeInfo.json ./localeInfo.js ./localeInfoModule.js -m "Automatically updated generated localeInfo files"' );
console.log( 'pushing' );
child_process.execSync( 'git push' );
}
}
catch( e ) {
console.error( 'Unable to update files in git.', e );
}
}
3 changes: 1 addition & 2 deletions js/getStringModule.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import ObjectLiteralIO from '../../tandem/js/types/ObjectLiteralIO.js';
import LocalizedString, { LocalizedStringStateDelta, StringsStateStateObject } from './LocalizedString.js';
import TReadOnlyProperty from '../../axon/js/TReadOnlyProperty.js';
import { Locale } from '../../joist/js/i18n/localeProperty.js';
import localeInfoModule from '../../chipper/js/data/localeInfoModule.js';
import { PhetioID } from '../../tandem/js/TandemConstants.js';

// constants
Expand Down Expand Up @@ -106,7 +105,7 @@ const getStringModule = ( requirejsNamespace: string ): object => {
// Our locale information is from phet.chipper.locale

assert && assert( typeof phet.chipper.locale === 'string', 'phet.chipper.locale should have been loaded by now' );
assert && assert( Object.keys( localeInfoModule ).includes( phet.chipper.locale ), 'phet.chipper.locale should have been loaded by now' );
assert && assert( Object.keys( phet.chipper.localeData ).includes( phet.chipper.locale ), 'phet.chipper.locale should have been loaded by now' );
assert && assert( phet.chipper.strings, 'phet.chipper.strings should have been loaded by now' );

// Construct locales in increasing specificity, e.g. [ 'en', 'zh', 'zh_CN' ], so we get fallbacks in order
Expand Down
1 change: 1 addition & 0 deletions js/grunt/buildRunnable.js
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ module.exports = async function( repo, minifyOptions, allHTML, brand, localesOpt
const commonInitializationOptions = {
brand: brand,
repo: repo,
allLocales: allLocales,
stringMap: stringMap,
stringMetadata: stringMetadata,
dependencies: dependencies,
Expand Down
44 changes: 38 additions & 6 deletions js/grunt/getInitializationScript.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@


// modules
const _ = require( 'lodash' );
const assert = require( 'assert' );
const ChipperConstants = require( '../common/ChipperConstants' );
const ChipperStringUtils = require( '../common/ChipperStringUtils' );
const fs = require( 'fs' );
const grunt = require( 'grunt' );
const transpile = require( './transpile' );
const stringEncoding = require( '../common/stringEncoding' );
Expand All @@ -29,6 +31,7 @@ module.exports = function( config ) {
const {
brand, // {string}, e.g. 'phet', 'phet-io'
repo, // {string}
allLocales, // {string[]}
stringMap, // {Object}, map[ locale ][ stringKey ] => {string}
stringMetadata, // {Object}, map[ stringKey ] => {Object}
version, // {string}
Expand All @@ -51,17 +54,45 @@ module.exports = function( config ) {
assert( typeof includeAllLocales === 'boolean', 'Requires includeAllLocales' );
assert( typeof isDebugBuild === 'boolean', 'Requires isDebugBuild' );

// Load localeData
const fullLocaleData = JSON.parse( fs.readFileSync( '../babel/localeData.json', 'utf8' ) );

// Include a subset of locales' translated strings
let phetStrings = stringMap;
if ( !includeAllLocales ) {
phetStrings = {};
phetStrings[ ChipperConstants.FALLBACK_LOCALE ] = stringMap[ ChipperConstants.FALLBACK_LOCALE ];
if ( locale !== ChipperConstants.FALLBACK_LOCALE ) {

// Go through all of the potential fallback locales, and include the strings for each of them
const requiredLocales = [
// duplicates OK
locale,
...( fullLocaleData[ locale ].fallbackLocales || [] ),
ChipperConstants.FALLBACK_LOCALE
];

for ( const locale of requiredLocales ) {
phetStrings[ locale ] = stringMap[ locale ];
}
const splitLocale = locale.slice( 0, 2 );
if ( locale.length > 2 && splitLocale !== ChipperConstants.FALLBACK_LOCALE ) {
phetStrings[ splitLocale ] = stringMap[ splitLocale ];
}
}

// Include a (larger) subset of locales' localeData.
const includedDataLocales = _.sortBy( _.uniq( [
// Always include the fallback (en)
ChipperConstants.FALLBACK_LOCALE,

// Include directly-used locales
...allLocales,

// Include locales that will fall back to directly-used locales
Object.keys( fullLocaleData ).filter( locale => {
return fullLocaleData[ locale ].fallbackLocales && fullLocaleData[ locale ].fallbackLocales.some( fallbackLocale => {
return allLocales.includes( fallbackLocale );
} );
} )
] ) );
const localeData = {};
for ( const locale of includedDataLocales ) {
localeData[ locale ] = fullLocaleData[ locale ];
}

return ChipperStringUtils.replacePlaceholders( grunt.file.read( '../chipper/templates/chipper-initialization.js' ), {
Expand All @@ -70,6 +101,7 @@ module.exports = function( config ) {
PHET_BUILD_TIMESTAMP: timestamp,
PHET_BRAND: brand,
PHET_LOCALE: locale,
PHET_LOCALE_DATA: JSON.stringify( localeData ),
PHET_DEPENDENCIES: JSON.stringify( dependencies, null, 2 ),
// If it's a debug build, don't encode the strings, so that they are easier to inspect
PHET_STRINGS: ( isDebugBuild || !encodeStringMap ) ? JSON.stringify( phetStrings, null, isDebugBuild ? 2 : '' ) : stringEncoding.encodeStringMapToJS( phetStrings ),
Expand Down
56 changes: 26 additions & 30 deletions js/grunt/getStringMap.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,27 @@ const pascalCase = require( '../common/pascalCase' );
const ChipperStringUtils = require( '../common/ChipperStringUtils' );
const fs = require( 'fs' );
const grunt = require( 'grunt' );
const localeInfo = require( '../data/localeInfo' ); // Locale information
const path = require( 'path' );

const localeData = JSON.parse( fs.readFileSync( '../babel/localeData.json', 'utf8' ) );

/**
* For a given locale, return an array of specific locales that we'll use as fallbacks, e.g.
* 'ar_AE' => [ 'ar_AE', 'ar', 'ar_MA', 'en' ] (note, changed from zh_CN example, which does NOT use 'zh' as a fallback anymore)
* 'es' => [ 'es', 'en' ]
* 'en' => [ 'en' ]
*
* @param {string} locale
* @returns {Array.<string>}
*/
const localeFallbacks = locale => {
return [
...( locale !== ChipperConstants.FALLBACK_LOCALE ? [ locale ] : [] ),
...( localeData[ locale ].fallbackLocales || [] ),
ChipperConstants.FALLBACK_LOCALE // e.g. 'en'
];
};

/**
* Load all the required string files into memory, so we don't load them multiple times (for each usage).
*
Expand Down Expand Up @@ -55,19 +73,14 @@ const getStringFilesContents = ( reposWithUsedStrings, locales ) => {
stringFilesContents[ repo ][ locale ] = fileContents;
};

locales.forEach( locale => {
assert( localeInfo[ locale ], `unsupported locale: ${locale}` );
const isRTL = localeInfo[ locale ].direction === 'rtl';
// Include fallback locales (they may have duplicates)
const includedLocales = _.sortBy( _.uniq( locales.flatMap( locale => {
assert( localeData[ locale ], `unsupported locale: ${locale}` );

// Handle fallback locales
addLocale( locale, isRTL );
if ( locale.length > 2 ) {
const middleLocale = locale.slice( 0, 2 );
if ( !locales.includes( middleLocale ) ) {
addLocale( middleLocale, isRTL );
}
}
} );
return localeFallbacks( locale );
} ) ) );

includedLocales.forEach( locale => addLocale( locale, localeData[ locale ].direction === 'rtl' ) );
} );

return stringFilesContents;
Expand All @@ -85,23 +98,6 @@ module.exports = function( mainRepo, locales, phetLibs, usedModules ) {

assert( locales.indexOf( ChipperConstants.FALLBACK_LOCALE ) !== -1, 'fallback locale is required' );

/**
* For a given locale, return an array of specific locales that we'll use as fallbacks, e.g.
* 'zh_CN' => [ 'zh_CN', 'zh', 'en' ]
* 'es' => [ 'es', 'en' ]
* 'en' => [ 'en' ]
*
* @param {string} locale
* @returns {Array.<string>}
*/
const localeFallbacks = locale => {
return [
...( locale !== ChipperConstants.FALLBACK_LOCALE ? [ locale ] : [] ), // e.g. 'zh_CN'
...( ( locale.length > 2 && locale.slice( 0, 2 ) !== ChipperConstants.FALLBACK_LOCALE ) ? [ locale.slice( 0, 2 ) ] : [] ), // e.g. 'zh'
ChipperConstants.FALLBACK_LOCALE // e.g. 'en'
];
};

// Load the file contents of every single JS module that used any strings
const usedFileContents = usedModules.map( usedModule => fs.readFileSync( `../${usedModule}`, 'utf-8' ) );

Expand Down
Loading

0 comments on commit 8e96bb5

Please sign in to comment.