Skip to content

Commit

Permalink
CLDR-17459 Add units for grammar (#3682)
Browse files Browse the repository at this point in the history
- BRS: move to -z BUILD

Co-authored-by: Steven R. Loomis <[email protected]>
  • Loading branch information
macchiati and srl295 authored May 3, 2024
1 parent 999aa06 commit fb1691a
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: run CLDR console check
run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z FINAL_TESTING
run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z BUILD
deploy:
# don't run deploy on manual builds!
if: github.repository == 'unicode-org/cldr' && github.event_name == 'push' && github.ref == 'refs/heads/main' && github.event.inputs.git-ref == ''
Expand Down
2 changes: 1 addition & 1 deletion common/supplemental/units.xml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ For terms of use, see http://www.unicode.org/copyright.html
<convertUnit source='pascal' baseUnit='kilogram-per-meter-square-second' systems="si metric prefixable"/>
<convertUnit source='bar' baseUnit='kilogram-per-meter-square-second' factor='100000' systems="si_acceptable metric prefixable"/>
<convertUnit source='atmosphere' baseUnit='kilogram-per-meter-square-second' factor='101325' systems="ussystem uksystem "/>
<convertUnit source='gasoline-energy-density' baseUnit='kilogram-per-meter-square-second' factor='33.705 * 3600 * 1000/gal_to_m3' systems="metric_adjacent ussystem uksystem" description="Constructed so that 1 gallon-gasoline-energy-density = 33.705 kWh as per https://www3.epa.gov/otaq/gvg/learn-more-technology.htm"/>
<convertUnit source='gasoline-energy-density' baseUnit='kilogram-per-meter-square-second' factor='33.705 * 3600 * 1000/gal_to_m3' systems="ussystem uksystem" description="Constructed so that 1 gallon-gasoline-energy-density = 33.705 kWh as per https://www3.epa.gov/otaq/gvg/learn-more-technology.htm"/>

<!-- pressure-per-length -->
<convertUnit source='ofhg' baseUnit='kilogram-per-square-meter-square-second' factor='13595.1*gravity' systems="metric_adjacent uksystem ussystem"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -745,21 +745,19 @@ public static Set<String> getGrammarLocales() {
"month",
"year");

// To see a list of the results for double-checking, run TestUnits with TestUnitsToTranslate -v
static final Set<String> EXCLUDE_GRAMMAR =
Set.of(
"point",
"dunam",
"dot",
"astronomical-unit",
"nautical-mile",
"knot",
"dalton",
"dot", // fallback is pixel
"dot-per-centimeter", // fallback is pixel
"dunam", // language-specific
"astronomical-unit", // specialized
"nautical-mile", // US/UK specific
"knot", // US/UK specific
"dalton", // specialized
"electronvolt", // specialized
"kilocalorie",
"electronvolt",
// The following may be reinstated after 45.
"dot-per-centimeter",
"millimeter-ofhg",
"milligram-ofglucose-per-deciliter");
"point");

public static Set<String> getSpecialsToTranslate() {
return INCLUDE_OTHER;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.impl.Row.R4;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.number.UnlocalizedNumberFormatter;
import com.ibm.icu.text.PluralRules;
Expand All @@ -29,6 +31,7 @@
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
Expand All @@ -48,7 +51,9 @@
import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
import org.unicode.cldr.util.Rational.FormatStyle;
import org.unicode.cldr.util.Rational.RationalParser;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
import org.unicode.cldr.util.Validity.Status;

public class UnitConverter implements Freezable<UnitConverter> {
public static boolean DEBUG = false;
Expand Down Expand Up @@ -80,7 +85,7 @@ public class UnitConverter implements Freezable<UnitConverter> {
private Multimap<String, UnitSystem> sourceToSystems = TreeMultimap.create();
private Set<String> baseUnits;
private Multimap<String, Continuation> continuations = TreeMultimap.create();
private Comparator<String> quantityComparator;
private MapComparator<String> quantityComparator;

private Map<String, String> fixDenormalized;
private ImmutableMap<String, UnitId> idToUnitId;
Expand All @@ -92,6 +97,17 @@ public class UnitConverter implements Freezable<UnitConverter> {

public TargetInfoComparator targetInfoComparator;

private final MapComparator<String> LongUnitIdOrder = new MapComparator<>();
private final MapComparator<String> ShortUnitIdOrder = new MapComparator<>();

public Comparator<String> getLongUnitIdComparator() {
return LongUnitIdOrder;
}

public Comparator<String> getShortUnitIdComparator() {
return ShortUnitIdOrder;
}

/** Warning: ordering is important; determines the normalized output */
public static final Set<String> BASE_UNITS =
ImmutableSet.of(
Expand Down Expand Up @@ -198,6 +214,74 @@ public UnitConverter freeze() {
}
}
idToUnitId = ImmutableMap.copyOf(_idToUnitId);

// build the map comparators

Set<R4<Integer, UnitSystem, Rational, String>> all = new TreeSet<>();
Set<String> baseSeen = new HashSet<>();
for (String longUnit :
Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
Output<String> base = new Output<>();
String shortUnit = getShortId(longUnit);
ConversionInfo conversionInfo = parseUnitId(shortUnit, base, false);
if (conversionInfo == null) {
if (longUnit.equals("temperature-generic")) {
conversionInfo = parseUnitId("kelvin", base, false);
}
}
String quantity = getQuantityFromUnit(base.value, false);
Integer quantityNumericOrder = quantityComparator.getNumericOrder(quantity);
if (quantityNumericOrder == null) { // try the inverse
if (base.value.equals("meter-per-cubic-meter")) { // HACK
quantityNumericOrder = quantityComparator.getNumericOrder("consumption");
}
if (quantityNumericOrder == null) {
throw new IllegalArgumentException(
"Missing quantity for: " + base.value + ", " + shortUnit);
}
}

final EnumSet<UnitSystem> systems = EnumSet.copyOf(getSystemsEnum(shortUnit));

// to sort the right items together items together, put together a sort key
UnitSystem sortingSystem = systems.iterator().next();
switch (sortingSystem) {
case metric:
case si:
case si_acceptable:
case astronomical:
case metric_adjacent:
case person_age:
sortingSystem = UnitSystem.metric;
break;
// country specific
case other:
case ussystem:
case uksystem:
case jpsystem:
sortingSystem = UnitSystem.other;
break;
default:
throw new IllegalArgumentException(
"Add new unitSystem to a grouping: " + sortingSystem);
}
R4<Integer, UnitSystem, Rational, String> sortKey =
Row.of(
quantityNumericOrder,
sortingSystem,
conversionInfo.factor,
shortUnit);
all.add(sortKey);
}
LongUnitIdOrder.setErrorOnMissing(true);
ShortUnitIdOrder.setErrorOnMissing(true);
for (R4<Integer, UnitSystem, Rational, String> item : all) {
String shortId = item.get3();
ShortUnitIdOrder.add(shortId);
LongUnitIdOrder.add(getLongId(shortId));
}
LongUnitIdOrder.freeze();
ShortUnitIdOrder.freeze();
}
return this;
}
Expand Down Expand Up @@ -649,7 +733,7 @@ private void addToSourceToTarget(
}
}

private Comparator<String> getQuantityComparator(
private MapComparator<String> getQuantityComparator(
Map<String, String> baseUnitToQuantity2, Map<String, String> baseUnitToStatus2) {
// We want to sort all the quantities so that we have a natural ordering within compound
// units. So kilowatt-hour, not hour-kilowatt.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3544,7 +3544,22 @@ enum TranslationStatus {
has_grammar_X,
add_grammar,
skip_grammar,
skip_trans
skip_trans("\t— specific langs poss.)");

private TranslationStatus() {
outName = name();
}

private final String outName;

private TranslationStatus(String extra) {
outName = name() + extra;
}

@Override
public String toString() {
return outName;
}
}

/**
Expand All @@ -3555,7 +3570,8 @@ public void TestUnitsToTranslate() {
Set<String> toTranslate = GrammarInfo.getUnitsToAddGrammar();
final CLDRConfig config = CLDRConfig.getInstance();
final UnitConverter converter = config.getSupplementalDataInfo().getUnitConverter();
Map<String, TranslationStatus> shortUnitToTranslationStatus40 = new TreeMap<>();
Map<String, TranslationStatus> shortUnitToTranslationStatus40 =
new TreeMap<>(converter.getShortUnitIdComparator());
for (String longUnit :
Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
String shortUnit = converter.getShortId(longUnit);
Expand Down Expand Up @@ -3588,9 +3604,9 @@ public void TestUnitsToTranslate() {
TranslationStatus status40 = entry.getValue();
if (isVerbose())
System.out.println(
shortUnit
converter.getQuantityFromUnit(shortUnit, false)
+ "\t"
+ converter.getQuantityFromUnit(shortUnit, false)
+ shortUnit
+ "\t"
+ converter.getSystemsEnum(shortUnit)
+ "\t"
Expand Down

0 comments on commit fb1691a

Please sign in to comment.