diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 14ed3163248..c9e61af4087 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -187,7 +187,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: run CLDR console check - run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z FINAL_TESTING + run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z BUILD deploy: # don't run deploy on manual builds! if: github.repository == 'unicode-org/cldr' && github.event_name == 'push' && github.ref == 'refs/heads/main' && github.event.inputs.git-ref == '' diff --git a/common/supplemental/units.xml b/common/supplemental/units.xml index 33d6d68d466..3324cc5821f 100644 --- a/common/supplemental/units.xml +++ b/common/supplemental/units.xml @@ -289,7 +289,7 @@ For terms of use, see http://www.unicode.org/copyright.html - + diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java index bf64edd6b58..2f6c9e7b61b 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java @@ -745,21 +745,19 @@ public static Set getGrammarLocales() { "month", "year"); + // To see a list of the results for double-checking, run TestUnits with TestUnitsToTranslate -v static final Set EXCLUDE_GRAMMAR = Set.of( - "point", - "dunam", - "dot", - "astronomical-unit", - "nautical-mile", - "knot", - "dalton", + "dot", // fallback is pixel + "dot-per-centimeter", // fallback is pixel + "dunam", // language-specific + "astronomical-unit", // specialized + "nautical-mile", // US/UK specific + "knot", // US/UK specific + "dalton", // specialized + "electronvolt", // specialized "kilocalorie", - "electronvolt", - // The following may be reinstated after 45. - "dot-per-centimeter", - "millimeter-ofhg", - "milligram-ofglucose-per-deciliter"); + "point"); public static Set getSpecialsToTranslate() { return INCLUDE_OTHER; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java index 3f29adae2b1..ec17dc6aa5f 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java @@ -13,7 +13,9 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import com.google.common.collect.TreeMultimap; +import com.ibm.icu.impl.Row; import com.ibm.icu.impl.Row.R2; +import com.ibm.icu.impl.Row.R4; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.number.UnlocalizedNumberFormatter; import com.ibm.icu.text.PluralRules; @@ -29,6 +31,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.EnumSet; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -48,7 +51,9 @@ import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; import org.unicode.cldr.util.Rational.FormatStyle; import org.unicode.cldr.util.Rational.RationalParser; +import org.unicode.cldr.util.StandardCodes.LstrType; import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; +import org.unicode.cldr.util.Validity.Status; public class UnitConverter implements Freezable { public static boolean DEBUG = false; @@ -80,7 +85,7 @@ public class UnitConverter implements Freezable { private Multimap sourceToSystems = TreeMultimap.create(); private Set baseUnits; private Multimap continuations = TreeMultimap.create(); - private Comparator quantityComparator; + private MapComparator quantityComparator; private Map fixDenormalized; private ImmutableMap idToUnitId; @@ -92,6 +97,17 @@ public class UnitConverter implements Freezable { public TargetInfoComparator targetInfoComparator; + private final MapComparator LongUnitIdOrder = new MapComparator<>(); + private final MapComparator ShortUnitIdOrder = new MapComparator<>(); + + public Comparator getLongUnitIdComparator() { + return LongUnitIdOrder; + } + + public Comparator getShortUnitIdComparator() { + return ShortUnitIdOrder; + } + /** Warning: ordering is important; determines the normalized output */ public static final Set BASE_UNITS = ImmutableSet.of( @@ -198,6 +214,74 @@ public UnitConverter freeze() { } } idToUnitId = ImmutableMap.copyOf(_idToUnitId); + + // build the map comparators + + Set> all = new TreeSet<>(); + Set baseSeen = new HashSet<>(); + for (String longUnit : + Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) { + Output base = new Output<>(); + String shortUnit = getShortId(longUnit); + ConversionInfo conversionInfo = parseUnitId(shortUnit, base, false); + if (conversionInfo == null) { + if (longUnit.equals("temperature-generic")) { + conversionInfo = parseUnitId("kelvin", base, false); + } + } + String quantity = getQuantityFromUnit(base.value, false); + Integer quantityNumericOrder = quantityComparator.getNumericOrder(quantity); + if (quantityNumericOrder == null) { // try the inverse + if (base.value.equals("meter-per-cubic-meter")) { // HACK + quantityNumericOrder = quantityComparator.getNumericOrder("consumption"); + } + if (quantityNumericOrder == null) { + throw new IllegalArgumentException( + "Missing quantity for: " + base.value + ", " + shortUnit); + } + } + + final EnumSet systems = EnumSet.copyOf(getSystemsEnum(shortUnit)); + + // to sort the right items together items together, put together a sort key + UnitSystem sortingSystem = systems.iterator().next(); + switch (sortingSystem) { + case metric: + case si: + case si_acceptable: + case astronomical: + case metric_adjacent: + case person_age: + sortingSystem = UnitSystem.metric; + break; + // country specific + case other: + case ussystem: + case uksystem: + case jpsystem: + sortingSystem = UnitSystem.other; + break; + default: + throw new IllegalArgumentException( + "Add new unitSystem to a grouping: " + sortingSystem); + } + R4 sortKey = + Row.of( + quantityNumericOrder, + sortingSystem, + conversionInfo.factor, + shortUnit); + all.add(sortKey); + } + LongUnitIdOrder.setErrorOnMissing(true); + ShortUnitIdOrder.setErrorOnMissing(true); + for (R4 item : all) { + String shortId = item.get3(); + ShortUnitIdOrder.add(shortId); + LongUnitIdOrder.add(getLongId(shortId)); + } + LongUnitIdOrder.freeze(); + ShortUnitIdOrder.freeze(); } return this; } @@ -649,7 +733,7 @@ private void addToSourceToTarget( } } - private Comparator getQuantityComparator( + private MapComparator getQuantityComparator( Map baseUnitToQuantity2, Map baseUnitToStatus2) { // We want to sort all the quantities so that we have a natural ordering within compound // units. So kilowatt-hour, not hour-kilowatt. diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java index f93b83d70bb..0ff27a4964d 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java @@ -3544,7 +3544,22 @@ enum TranslationStatus { has_grammar_X, add_grammar, skip_grammar, - skip_trans + skip_trans("\t— specific langs poss.)"); + + private TranslationStatus() { + outName = name(); + } + + private final String outName; + + private TranslationStatus(String extra) { + outName = name() + extra; + } + + @Override + public String toString() { + return outName; + } } /** @@ -3555,7 +3570,8 @@ public void TestUnitsToTranslate() { Set toTranslate = GrammarInfo.getUnitsToAddGrammar(); final CLDRConfig config = CLDRConfig.getInstance(); final UnitConverter converter = config.getSupplementalDataInfo().getUnitConverter(); - Map shortUnitToTranslationStatus40 = new TreeMap<>(); + Map shortUnitToTranslationStatus40 = + new TreeMap<>(converter.getShortUnitIdComparator()); for (String longUnit : Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) { String shortUnit = converter.getShortId(longUnit); @@ -3588,9 +3604,9 @@ public void TestUnitsToTranslate() { TranslationStatus status40 = entry.getValue(); if (isVerbose()) System.out.println( - shortUnit + converter.getQuantityFromUnit(shortUnit, false) + "\t" - + converter.getQuantityFromUnit(shortUnit, false) + + shortUnit + "\t" + converter.getSystemsEnum(shortUnit) + "\t"