Skip to content

Commit

Permalink
QLocale: fix likely subtags to include und -> en_Latn_US
Browse files Browse the repository at this point in the history
The lack of this was hidden by other rules (redundant with it) until
CLDR v45, but v46 prunes the redundant rules, breaking this. So
include the missing rule and tweak the code that assumed likely
sub-tag rules preserved language, since this one doesn't. Rework the
tail of withLikelySubtagsAdded() to correctly use this rule, now that
we have it. (The prior comment about there being no match-all was
wrong: CLDR did have it, but our data skipped it.) Amended one test
affected by it (when system locale wasn't en_US).

On picking to 6.8, uiLanguages() needed some coaxing to avoid
duplicate C locale entries in tests of qualified C locale.

Task-number: QTBUG-130877
Change-Id: I2a415b67af4bc8aa6a766bcc1e349ee5bda9f174
Reviewed-by: Mate Barany <[email protected]>
(cherry picked from commit 3038631)
  • Loading branch information
ediosyncratic committed Dec 10, 2024
1 parent 92e454c commit 1518974
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 19 deletions.
31 changes: 18 additions & 13 deletions src/corelib/text/qlocale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,15 +374,17 @@ QLocaleId QLocaleId::withLikelySubtagsAdded() const noexcept
return value;
}
}
if (matchesAll()) { // Skipped all of the above.
// CLDR has no match-all at v37, but might get one some day ...
pairs = std::lower_bound(pairs, afterPairs, sought);
if (pairs < afterPairs) {
// All other keys are < match-all.
Q_ASSERT(pairs + 1 == afterPairs);
Q_ASSERT(pairs->key.matchesAll());
return pairs->value;
}
// Finally, fall back to the match-all rule (if there is one):
pairs = afterPairs - 1; // All other keys are < match-all.
if (pairs->key.matchesAll()) {
QLocaleId value = pairs->value;
if (language_id)
value.language_id = language_id;
if (territory_id)
value.territory_id = territory_id;
if (script_id)
value.script_id = script_id;
return value;
}
return *this;
}
Expand Down Expand Up @@ -4899,6 +4901,13 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
}
for (qsizetype i = localeIds.size(); i-- > 0; ) {
QLocaleId id = localeIds.at(i);
if (id.language_id == C) {
// Attempt no likely sub-tag amendments to C:
const QString name = QString::fromLatin1(id.name(sep));
if (!uiLanguages.contains(name))
uiLanguages.append(name);
continue;
}
qsizetype j;
QByteArray prior;
if (isSystem && i < uiLanguages.size()) {
Expand All @@ -4907,10 +4916,6 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const
prior = uiLanguages.at(i).toLatin1();
// Insert just after the entry we're supplementing:
j = i + 1;
} else if (id.language_id == C) {
// Attempt no likely sub-tag amendments to C:
uiLanguages.append(QString::fromLatin1(id.name(sep)));
continue;
} else {
// Plain locale or empty system uiLanguages; just append.
prior = id.name(sep);
Expand Down
5 changes: 3 additions & 2 deletions src/corelib/text/qlocale_data_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ struct LanguageCodeEntry {
// GENERATED PART STARTS HERE

/*
This part of the file was generated on 2024-05-31 from the
This part of the file was generated on 2024-12-02 from the
Common Locale Data Repository v45

http://www.unicode.org/cldr/
Expand Down Expand Up @@ -1051,7 +1051,8 @@ static inline constexpr QLocaleId likely_subtags[] = {
{ 0, 138, 0 }, { 302, 138, 227 }, // und_Ugar -> uga_Ugar_SY
{ 0, 139, 0 }, { 308, 139, 134 }, // und_Vaii -> vai_Vaii_LR
{ 0, 141, 0 }, { 255, 141, 50 }, // und_Yiii -> ii_Yiii_CN
{ 0, 142, 0 }, { 339, 142, 161 } // und_Rohg -> rhg_Rohg_MM
{ 0, 142, 0 }, { 339, 142, 161 }, // und_Rohg -> rhg_Rohg_MM
{ 0, 0, 0 }, { 75, 66, 248 } // und -> en_Latn_US
};

static inline constexpr quint16 locale_index[] = {
Expand Down
2 changes: 1 addition & 1 deletion tests/auto/corelib/text/qlocale/tst_qlocale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ void tst_QLocale::defaulted_ctor()
QCOMPARE(l.territory(), exp_country); \
} while (false)

TEST_CTOR(AnyLanguage, AnyTerritory, default_lang, default_country);
TEST_CTOR(AnyLanguage, AnyTerritory, QLocale::English, QLocale::UnitedStates);
TEST_CTOR(C, AnyTerritory, QLocale::C, QLocale::AnyTerritory);
TEST_CTOR(Aymara, AnyTerritory, default_lang, default_country);
TEST_CTOR(Aymara, France, default_lang, default_country);
Expand Down
2 changes: 0 additions & 2 deletions util/locale_database/cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ def likelySubTags(self) -> Iterator[tuple[tuple[str, str, str, str],
else:
self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
continue
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
continue

give = (give[0],
# Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
Expand Down
4 changes: 3 additions & 1 deletion util/locale_database/qlocalexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,9 @@ def defaultMap(self) -> Iterator[tuple[tuple[int, int], int]]:
sub-tags mapping says language's default locale uses the given
script and territory."""
for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
if (have[0] != 'AnyLanguage'
and have[1:] == ('AnyScript', 'AnyTerritory')
and give[2] != 'AnyTerritory'):
assert have[0] == give[0], (have, give)
yield ((self.__langByName[give[0]][0],
self.__textByName[give[1]][0]),
Expand Down

0 comments on commit 1518974

Please sign in to comment.