From 8fac1d910f3b45fb4ee0479a0d565b564a08b35b Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 18 May 2018 11:36:50 -0700 Subject: [PATCH] deps: cherry-pick 6989b3f6d7 from V8 upstream Original commit message: Fix default Intl language tag handling With certain ICU data bundles (such as the Node.js "small-icu"), %GetDefaultICULocale() may return a more specific language tag (e.g. "en-US") than what's available (e.g. "en"). In those cases, consider the more specific language tag supported. This CL also resolves the following Node.js issue: https://github.com/nodejs/node/issues/15223 Bug: v8:7024 Cq-Include-Trybots: luci.v8.try:v8_linux_noi18n_rel_ng Change-Id: Ifda0776b3418734d5caa8af4e50c17cda95add73 Reviewed-on: https://chromium-review.googlesource.com/668350 Commit-Queue: Daniel Ehrenberg Reviewed-by: Daniel Ehrenberg Cr-Commit-Position: refs/heads/master@{#52716} PR-URL: https://github.com/nodejs/node/pull/20826 Fixes: https://github.com/nodejs/node/issues/15223 Refs: https://github.com/v8/v8/commit/6989b3f6d7d6a5e68127332296723317917821eb Reviewed-By: James M Snell Reviewed-By: Anatoli Papirovski Reviewed-By: Ben Noordhuis --- common.gypi | 2 +- deps/v8/src/js/intl.js | 89 ++++++++++++------- deps/v8/test/intl/assert.js | 41 +++++++++ .../intl/break-iterator/default-locale.js | 4 +- .../wellformed-unsupported-locale.js | 2 +- deps/v8/test/intl/collator/default-locale.js | 11 ++- .../collator/wellformed-unsupported-locale.js | 2 +- .../test/intl/date-format/default-locale.js | 4 +- .../wellformed-unsupported-locale.js | 2 +- .../test/intl/number-format/default-locale.js | 4 +- .../wellformed-unsupported-locale.js | 2 +- deps/v8/test/mjsunit/regress/regress-6288.js | 2 +- 12 files changed, 119 insertions(+), 46 deletions(-) diff --git a/common.gypi b/common.gypi index d7a730906b537b..dd11d7af3c6350 100644 --- a/common.gypi +++ b/common.gypi @@ -27,7 +27,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.8', + 'v8_embedder_string': '-node.9', # Enable disassembler for `--print-code` v8 options 'v8_enable_disassembler': 1, diff --git a/deps/v8/src/js/intl.js b/deps/v8/src/js/intl.js index 53fbe1f9473267..3c7112716c30cc 100644 --- a/deps/v8/src/js/intl.js +++ b/deps/v8/src/js/intl.js @@ -152,18 +152,11 @@ var AVAILABLE_LOCALES = { */ var DEFAULT_ICU_LOCALE = UNDEFINED; -function GetDefaultICULocaleJS(service) { +function GetDefaultICULocaleJS() { if (IS_UNDEFINED(DEFAULT_ICU_LOCALE)) { DEFAULT_ICU_LOCALE = %GetDefaultICULocale(); } - // Check that this is a valid default for this service, - // otherwise fall back to "und" - // TODO(littledan,jshin): AvailableLocalesOf sometimes excludes locales - // which don't require tailoring, but work fine with root data. Look into - // exposing this fact in ICU or the way Chrome bundles data. - return (IS_UNDEFINED(service) || - HAS_OWN_PROPERTY(getAvailableLocalesOf(service), DEFAULT_ICU_LOCALE)) - ? DEFAULT_ICU_LOCALE : "und"; + return DEFAULT_ICU_LOCALE; } /** @@ -434,6 +427,48 @@ function resolveLocale(service, requestedLocales, options) { } +/** + * Look up the longest non-empty prefix of |locale| that is an element of + * |availableLocales|. Returns undefined when the |locale| is completely + * unsupported by |availableLocales|. + */ +function bestAvailableLocale(availableLocales, locale) { + do { + if (!IS_UNDEFINED(availableLocales[locale])) { + return locale; + } + // Truncate locale if possible. + var pos = %StringLastIndexOf(locale, '-'); + if (pos === -1) { + break; + } + locale = %_Call(StringSubstring, locale, 0, pos); + } while (true); + + return UNDEFINED; +} + + +/** + * Try to match any mutation of |requestedLocale| against |availableLocales|. + */ +function attemptSingleLookup(availableLocales, requestedLocale) { + // Remove all extensions. + var noExtensionsLocale = %RegExpInternalReplace( + GetAnyExtensionRE(), requestedLocale, ''); + var availableLocale = bestAvailableLocale( + availableLocales, requestedLocale); + if (!IS_UNDEFINED(availableLocale)) { + // Return the resolved locale and extension. + var extensionMatch = %regexp_internal_match( + GetUnicodeExtensionRE(), requestedLocale); + var extension = IS_NULL(extensionMatch) ? '' : extensionMatch[0]; + return {locale: availableLocale, extension: extension}; + } + return UNDEFINED; +} + + /** * Returns best matched supported locale and extension info using basic * lookup algorithm. @@ -446,31 +481,25 @@ function lookupMatcher(service, requestedLocales) { var availableLocales = getAvailableLocalesOf(service); for (var i = 0; i < requestedLocales.length; ++i) { - // Remove all extensions. - var locale = %RegExpInternalReplace( - GetAnyExtensionRE(), requestedLocales[i], ''); - do { - if (!IS_UNDEFINED(availableLocales[locale])) { - // Return the resolved locale and extension. - var extensionMatch = %regexp_internal_match( - GetUnicodeExtensionRE(), requestedLocales[i]); - var extension = IS_NULL(extensionMatch) ? '' : extensionMatch[0]; - return {locale: locale, extension: extension, position: i}; - } - // Truncate locale if possible. - var pos = %StringLastIndexOf(locale, '-'); - if (pos === -1) { - break; - } - locale = %_Call(StringSubstring, locale, 0, pos); - } while (true); + var result = attemptSingleLookup(availableLocales, requestedLocales[i]); + if (!IS_UNDEFINED(result)) { + return result; + } + } + + var defLocale = GetDefaultICULocaleJS(); + + // While ECMA-402 returns defLocale directly, we have to check if it is + // supported, as such support is not guaranteed. + var result = attemptSingleLookup(availableLocales, defLocale); + if (!IS_UNDEFINED(result)) { + return result; } // Didn't find a match, return default. return { - locale: GetDefaultICULocaleJS(service), - extension: '', - position: -1 + locale: 'und', + extension: '' }; } diff --git a/deps/v8/test/intl/assert.js b/deps/v8/test/intl/assert.js index d8cc85849f463a..c11e7c0bbf8914 100644 --- a/deps/v8/test/intl/assert.js +++ b/deps/v8/test/intl/assert.js @@ -132,6 +132,16 @@ function assertFalse(value, user_message = '') { } +/** + * Throws if value is null. + */ +function assertNotNull(value, user_message = '') { + if (value === null) { + fail("not null", value, user_message); + } +} + + /** * Runs code() and asserts that it throws the specified exception. */ @@ -189,3 +199,34 @@ function assertInstanceof(obj, type) { (actualTypeName ? ' but of < ' + actualTypeName + '>' : '')); } } + + +/** + * Split a BCP 47 language tag into locale and extension. + */ +function splitLanguageTag(tag) { + var extRe = /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/; + var match = %regexp_internal_match(extRe, tag); + if (match) { + return { locale: tag.slice(0, match.index), extension: match[0] }; + } + + return { locale: tag, extension: '' }; +} + + +/** + * Throw if |parent| is not a more general language tag of |child|, nor |child| + * itself, per BCP 47 rules. + */ +function assertLanguageTag(child, parent) { + var childSplit = splitLanguageTag(child); + var parentSplit = splitLanguageTag(parent); + + // Do not compare extensions at this moment, as %GetDefaultICULocale() + // doesn't always output something we support. + if (childSplit.locale !== parentSplit.locale && + !childSplit.locale.startsWith(parentSplit.locale + '-')) { + fail(child, parent, 'language tag comparison'); + } +} diff --git a/deps/v8/test/intl/break-iterator/default-locale.js b/deps/v8/test/intl/break-iterator/default-locale.js index d8d5aeadb27510..e1a42a100a6b0f 100644 --- a/deps/v8/test/intl/break-iterator/default-locale.js +++ b/deps/v8/test/intl/break-iterator/default-locale.js @@ -37,8 +37,8 @@ assertFalse(options.locale === 'und'); assertFalse(options.locale === ''); assertFalse(options.locale === undefined); -// Then check for equality. -assertEquals(options.locale, %GetDefaultICULocale()); +// Then check for legitimacy. +assertLanguageTag(%GetDefaultICULocale(), options.locale); var iteratorNone = new Intl.v8BreakIterator(); assertEquals(options.locale, iteratorNone.resolvedOptions().locale); diff --git a/deps/v8/test/intl/break-iterator/wellformed-unsupported-locale.js b/deps/v8/test/intl/break-iterator/wellformed-unsupported-locale.js index 5ac8fbcd41583f..ffa44aef081eaf 100644 --- a/deps/v8/test/intl/break-iterator/wellformed-unsupported-locale.js +++ b/deps/v8/test/intl/break-iterator/wellformed-unsupported-locale.js @@ -29,4 +29,4 @@ var iterator = Intl.v8BreakIterator(['xx']); -assertEquals(iterator.resolvedOptions().locale, %GetDefaultICULocale()); +assertLanguageTag(%GetDefaultICULocale(), iterator.resolvedOptions().locale); diff --git a/deps/v8/test/intl/collator/default-locale.js b/deps/v8/test/intl/collator/default-locale.js index db9b1e73304114..5fc6ff4665c903 100644 --- a/deps/v8/test/intl/collator/default-locale.js +++ b/deps/v8/test/intl/collator/default-locale.js @@ -37,8 +37,8 @@ assertFalse(options.locale === 'und'); assertFalse(options.locale === ''); assertFalse(options.locale === undefined); -// Then check for equality. -assertEquals(options.locale, %GetDefaultICULocale()); +// Then check for legitimacy. +assertLanguageTag(%GetDefaultICULocale(), options.locale); var collatorNone = new Intl.Collator(); assertEquals(options.locale, collatorNone.resolvedOptions().locale); @@ -48,5 +48,8 @@ var collatorBraket = new Intl.Collator({}); assertEquals(options.locale, collatorBraket.resolvedOptions().locale); var collatorWithOptions = new Intl.Collator(undefined, {usage: 'search'}); -assertEquals(%GetDefaultICULocale() + '-u-co-search', - collatorWithOptions.resolvedOptions().locale); +assertLanguageTag(%GetDefaultICULocale(), + collatorWithOptions.resolvedOptions().locale); +assertNotNull( + %regexp_internal_match(/-u(-[a-zA-Z]+-[a-zA-Z]+)*-co-search/, + collatorWithOptions.resolvedOptions().locale)); diff --git a/deps/v8/test/intl/collator/wellformed-unsupported-locale.js b/deps/v8/test/intl/collator/wellformed-unsupported-locale.js index 3963d47a61ff85..ad89e3e2203e31 100644 --- a/deps/v8/test/intl/collator/wellformed-unsupported-locale.js +++ b/deps/v8/test/intl/collator/wellformed-unsupported-locale.js @@ -29,4 +29,4 @@ var collator = Intl.Collator(['xx']); -assertEquals(collator.resolvedOptions().locale, %GetDefaultICULocale()); +assertLanguageTag(%GetDefaultICULocale(), collator.resolvedOptions().locale); diff --git a/deps/v8/test/intl/date-format/default-locale.js b/deps/v8/test/intl/date-format/default-locale.js index 8e9b7fcec3e16e..2d79e895b54c38 100644 --- a/deps/v8/test/intl/date-format/default-locale.js +++ b/deps/v8/test/intl/date-format/default-locale.js @@ -37,8 +37,8 @@ assertFalse(options.locale === 'und'); assertFalse(options.locale === ''); assertFalse(options.locale === undefined); -// Then check for equality. -assertEquals(options.locale, %GetDefaultICULocale()); +// Then check for legitimacy. +assertLanguageTag(%GetDefaultICULocale(), options.locale); var dtfNone = new Intl.DateTimeFormat(); assertEquals(options.locale, dtfNone.resolvedOptions().locale); diff --git a/deps/v8/test/intl/date-format/wellformed-unsupported-locale.js b/deps/v8/test/intl/date-format/wellformed-unsupported-locale.js index 6f063abbd1a07c..b81216483250af 100644 --- a/deps/v8/test/intl/date-format/wellformed-unsupported-locale.js +++ b/deps/v8/test/intl/date-format/wellformed-unsupported-locale.js @@ -29,4 +29,4 @@ var dtf = Intl.DateTimeFormat(['xx']); -assertEquals(dtf.resolvedOptions().locale, %GetDefaultICULocale()); +assertLanguageTag(%GetDefaultICULocale(), dtf.resolvedOptions().locale); diff --git a/deps/v8/test/intl/number-format/default-locale.js b/deps/v8/test/intl/number-format/default-locale.js index cd67ba724f140f..a24aec23332786 100644 --- a/deps/v8/test/intl/number-format/default-locale.js +++ b/deps/v8/test/intl/number-format/default-locale.js @@ -37,8 +37,8 @@ assertFalse(options.locale === 'und'); assertFalse(options.locale === ''); assertFalse(options.locale === undefined); -// Then check for equality. -assertEquals(options.locale, %GetDefaultICULocale()); +// Then check for legitimacy. +assertLanguageTag(%GetDefaultICULocale(), options.locale); var nfNone = new Intl.NumberFormat(); assertEquals(options.locale, nfNone.resolvedOptions().locale); diff --git a/deps/v8/test/intl/number-format/wellformed-unsupported-locale.js b/deps/v8/test/intl/number-format/wellformed-unsupported-locale.js index 195eba4c19e09b..c51753928eeb87 100644 --- a/deps/v8/test/intl/number-format/wellformed-unsupported-locale.js +++ b/deps/v8/test/intl/number-format/wellformed-unsupported-locale.js @@ -29,4 +29,4 @@ var nf = Intl.NumberFormat(['xx']); -assertEquals(nf.resolvedOptions().locale, %GetDefaultICULocale()); +assertLanguageTag(%GetDefaultICULocale(), nf.resolvedOptions().locale); diff --git a/deps/v8/test/mjsunit/regress/regress-6288.js b/deps/v8/test/mjsunit/regress/regress-6288.js index 337af54c1a8303..5f550c31c8b72d 100644 --- a/deps/v8/test/mjsunit/regress/regress-6288.js +++ b/deps/v8/test/mjsunit/regress/regress-6288.js @@ -8,6 +8,6 @@ // DateTimeFormat but not Collation if (this.Intl) { - assertEquals('und', Intl.Collator().resolvedOptions().locale); + assertEquals('pt', Intl.Collator().resolvedOptions().locale); assertEquals('pt-BR', Intl.DateTimeFormat().resolvedOptions().locale); }