Skip to content

Commit

Permalink
8331485: Odd Results when Parsing Scientific Notation with Large Expo…
Browse files Browse the repository at this point in the history
…nent

8331680: NumberFormat is missing some bad exponent strict parse cases

Reviewed-by: naoto
  • Loading branch information
Justin Lu committed May 30, 2024
1 parent 79f4998 commit ffb0867
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 15 deletions.
60 changes: 47 additions & 13 deletions src/java.base/share/classes/java/text/DecimalFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -2479,9 +2479,12 @@ int subparseNumber(String text, int position,
symbols.getGroupingSeparator();
String exponentString = symbols.getExponentSeparator();
boolean sawDecimal = false;
boolean sawExponent = false;
boolean sawDigit = false;
int exponent = 0; // Set to the exponent value, if any
// Storing as long allows us to maintain accuracy of exponent
// when the exponent value as well as the decimalAt nears
// Integer.MAX/MIN value. However, the final expressed value is an int
long exponent = 0;
boolean[] expStat = new boolean[STATUS_LENGTH];

// We have to track digitCount ourselves, because digits.count will
// pin when the maximum allowable digits is reached.
Expand Down Expand Up @@ -2579,21 +2582,27 @@ int subparseNumber(String text, int position,
// require that they be followed by a digit. Otherwise
// we backup and reprocess them.
backup = position;
} else if (checkExponent && !isExponent && text.regionMatches(position, exponentString, 0, exponentString.length())
&& !sawExponent) {
} else if (checkExponent && !isExponent
&& text.regionMatches(position, exponentString, 0, exponentString.length())) {
// Process the exponent by recursively calling this method.
ParsePosition pos = new ParsePosition(position + exponentString.length());
boolean[] stat = new boolean[STATUS_LENGTH];
DigitList exponentDigits = new DigitList();

if (subparse(text, pos, "", symbols.getMinusSignText(), exponentDigits, true, stat) &&
exponentDigits.fitsIntoLong(stat[STATUS_POSITIVE], true)) {
position = pos.index; // Advance past the exponent
exponent = (int)exponentDigits.getLong();
if (!stat[STATUS_POSITIVE]) {
exponent = -exponent;
if (subparse(text, pos, "", symbols.getMinusSignText(), exponentDigits, true, expStat)) {
// We parse the exponent with isExponent == true, thus fitsIntoLong()
// only returns false here if the exponent DigitList value exceeds
// Long.MAX_VALUE. We do not need to worry about false being
// returned for faulty values as they are ignored by DigitList.
if (exponentDigits.fitsIntoLong(expStat[STATUS_POSITIVE], true)) {
exponent = exponentDigits.getLong();
if (!expStat[STATUS_POSITIVE]) {
exponent = -exponent;
}
} else {
exponent = expStat[STATUS_POSITIVE] ?
Long.MAX_VALUE : Long.MIN_VALUE;
}
sawExponent = true;
position = pos.index; // Advance past the exponent
}
break; // Whether we fail or succeed, we exit this loop
} else {
Expand Down Expand Up @@ -2628,7 +2637,9 @@ int subparseNumber(String text, int position,
}

// Adjust for exponent, if any
digits.decimalAt += exponent;
if (exponent != 0) {
digits.decimalAt = shiftDecimalAt(digits.decimalAt, exponent);
}

// If none of the text string was recognized. For example, parse
// "x" with pattern "#0.00" (return index and error index both 0)
Expand All @@ -2641,6 +2652,29 @@ int subparseNumber(String text, int position,
return position;
}

// Calculate the final decimal position based off the exponent value
// and the existing decimalAt position. If overflow/underflow, the value
// should be set as either Integer.MAX/MIN
private int shiftDecimalAt(int decimalAt, long exponent) {
try {
exponent = Math.addExact(decimalAt, exponent);
} catch (ArithmeticException ex) {
// If we under/overflow a Long do not bother with the decimalAt
// As it can only shift up to Integer.MAX/MIN which has no affect
if (exponent > 0 && decimalAt > 0) {
return Integer.MAX_VALUE;
} else {
return Integer.MIN_VALUE;
}
}
try {
decimalAt = Math.toIntExact(exponent);
} catch (ArithmeticException ex) {
decimalAt = exponent > 0 ? Integer.MAX_VALUE : Integer.MIN_VALUE;
}
return decimalAt;
}

// Checks to make sure grouping size is not violated. Used when strict.
private boolean isGroupingViolation(int pos, int prevGroupingPos) {
assert parseStrict : "Grouping violations should only occur when strict";
Expand Down
193 changes: 193 additions & 0 deletions test/jdk/java/text/Format/DecimalFormat/LargeExponentsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/*
* @test
* @bug 8331485
* @summary Ensure correctness when parsing large (+/-) exponent values that
* exceed Integer.MAX_VALUE and Long.MAX_VALUE.
* @run junit/othervm --add-opens java.base/java.text=ALL-UNNAMED LargeExponentsTest
*/

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.stream.Stream;

import static org.junit.jupiter.api.Assertions.assertEquals;

// We prevent odd results when parsing large exponent values by ensuring
// that we properly handle overflow in the implementation of DigitList
public class LargeExponentsTest {

// Exponent symbol is 'E'
private static final NumberFormat FMT = NumberFormat.getInstance(Locale.US);

// Check that the parsed value and parse position index are both equal to the expected values.
// We are mainly checking that an exponent > Integer.MAX_VALUE no longer
// parses to 0 and that an exponent > Long.MAX_VALUE no longer parses to the mantissa.
@ParameterizedTest
@MethodSource({"largeExponentValues", "smallExponentValues", "bugReportValues", "edgeCases"})
public void overflowTest(String parseString, Double expectedValue) throws ParseException {
checkParse(parseString, expectedValue);
checkParseWithPP(parseString, expectedValue);
}

// A separate white-box test to avoid the memory consumption of testing cases
// when the String is near Integer.MAX_LENGTH
@ParameterizedTest
@MethodSource
public void largeDecimalAtExponentTest(int expected, int decimalAt, long expVal)
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
DecimalFormat df = new DecimalFormat();
Method m = df.getClass().getDeclaredMethod(
"shiftDecimalAt", int.class, long.class);
m.setAccessible(true);
assertEquals(expected, m.invoke(df, decimalAt, expVal));
}

// Cases where we can test behavior when the String is near Integer.MAX_LENGTH
private static Stream<Arguments> largeDecimalAtExponentTest() {
return Stream.of(
// Equivalent to testing Arguments.of("0."+"0".repeat(Integer.MAX_VALUE-20)+"1"+"E2147483650", 1.0E22)
// This is an absurdly long decimal string with length close to Integer.MAX_VALUE
// where the decimal position correctly negates the exponent value, even if it exceeds Integer.MAX_VALUE
Arguments.of(23, -(Integer.MAX_VALUE-20), 3L+Integer.MAX_VALUE),
Arguments.of(-23, Integer.MAX_VALUE-20, -(3L+Integer.MAX_VALUE)),
Arguments.of(Integer.MIN_VALUE, -(Integer.MAX_VALUE-20), -(3L+Integer.MAX_VALUE)),
Arguments.of(Integer.MAX_VALUE, Integer.MAX_VALUE-20, 3L+Integer.MAX_VALUE),
Arguments.of(Integer.MAX_VALUE, -(Integer.MAX_VALUE-20), Long.MAX_VALUE),
Arguments.of(Integer.MIN_VALUE, Integer.MAX_VALUE-20, Long.MIN_VALUE)
);
}

// Checks the parse(String, ParsePosition) method
public void checkParse(String parseString, Double expectedValue) {
ParsePosition pp = new ParsePosition(0);
Number actualValue = FMT.parse(parseString, pp);
assertEquals(expectedValue, (double)actualValue);
assertEquals(parseString.length(), pp.getIndex());
}

// Checks the parse(String) method
public void checkParseWithPP(String parseString, Double expectedValue)
throws ParseException {
Number actualValue = FMT.parse(parseString);
assertEquals(expectedValue, (double)actualValue);
}

// Generate large enough exponents that should all be parsed as infinity
// when positive. This includes exponents that exceed Long.MAX_VALUE
private static List<Arguments> largeExponentValues() {
return createExponentValues(false);
}

// Same as previous provider but for negative exponent values, so expecting
// a parsed value of 0.
private static List<Arguments> smallExponentValues() {
return createExponentValues(true);
}

// Programmatically generate some large parse values that are expected
// to be parsed as infinity or 0
private static List<Arguments> createExponentValues(boolean negative) {
List<Arguments> args = new ArrayList<>();
// Start with a base value that should be parsed as infinity
String baseValue = "12234.123E1100";
// Continuously add to the String until we trigger the overflow condition
for (int i = 0; i < 100; i++) {
StringBuilder bldr = new StringBuilder();
// Add to exponent
bldr.append(baseValue).append("1".repeat(i));
// Add to mantissa
bldr.insert(0, "1".repeat(i));
args.add(Arguments.of(
// Prepend "-" to exponent if negative
negative ? bldr.insert(bldr.indexOf("E")+1, "-").toString() : bldr.toString(),
// Expect 0 if negative, else infinity
negative ? 0.0 : Double.POSITIVE_INFINITY));
}
return args;
}

// The provided values are all from the JBS issue
// These contain exponents that exceed Integer.MAX_VALUE, but not Long.MAX_VALUE
private static Stream<Arguments> bugReportValues() {
return Stream.of(
Arguments.of("0.123E1", 1.23),
Arguments.of("0.123E309", 1.23E308),
Arguments.of("0.123E310", Double.POSITIVE_INFINITY),
Arguments.of("0.123E2147483647", Double.POSITIVE_INFINITY),
Arguments.of("0.123E2147483648", Double.POSITIVE_INFINITY),
Arguments.of("0.0123E2147483648", Double.POSITIVE_INFINITY),
Arguments.of("0.0123E2147483649", Double.POSITIVE_INFINITY),
Arguments.of("1.23E2147483646", Double.POSITIVE_INFINITY),
Arguments.of("1.23E2147483647", Double.POSITIVE_INFINITY),
Arguments.of("0.123E4294967296", Double.POSITIVE_INFINITY),
Arguments.of("0.123E-322", 9.9E-324),
Arguments.of("0.123E-323", 0.0),
Arguments.of("0.123E-2147483647", 0.0),
Arguments.of("0.123E-2147483648", 0.0),
Arguments.of("0.123E-2147483649", 0.0)
);
}

// Some other edge case values to ensure parse correctness
private static Stream<Arguments> edgeCases() {
return Stream.of(
// Exponent itself does not cause underflow, but decimalAt adjustment
// based off mantissa should. decimalAt(-1) + exponent(Integer.MIN_VALUE) = underflow
Arguments.of("0.0123E-2147483648", 0.0),
// 0 exponent
Arguments.of("1.23E0", 1.23),
// Leading zeroes
Arguments.of("1.23E0000123", 1.23E123),
// Leading zeroes - Past Long.MAX_VALUE length
Arguments.of("1.23E00000000000000000000000000000000000000000123", 1.23E123),
// Trailing zeroes
Arguments.of("1.23E100", 1.23E100),
// Long.MAX_VALUE length
Arguments.of("1.23E1234567891234567800", Double.POSITIVE_INFINITY),
// Long.MAX_VALUE with trailing zeroes
Arguments.of("1.23E9223372036854775807000", Double.POSITIVE_INFINITY),
// Long.MIN_VALUE
Arguments.of("1.23E-9223372036854775808", 0.0),
// Exponent value smaller than Long.MIN_VALUE
Arguments.of("1.23E-9223372036854775809", 0.0),
// Exponent value equal to Long.MAX_VALUE
Arguments.of("1.23E9223372036854775807", Double.POSITIVE_INFINITY),
// Exponent value larger than Long.MAX_VALUE
Arguments.of("1.23E9223372036854775808", Double.POSITIVE_INFINITY)
);
}
}
15 changes: 14 additions & 1 deletion test/jdk/java/text/Format/NumberFormat/LenientParseTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

/*
* @test
* @bug 8327640
* @bug 8327640 8331485
* @summary Test suite for NumberFormat parsing when lenient.
* @run junit/othervm -Duser.language=en -Duser.country=US LenientParseTest
* @run junit/othervm -Duser.language=ja -Duser.country=JP LenientParseTest
Expand All @@ -34,6 +34,7 @@
* @run junit/othervm -Duser.language=ar -Duser.country=AR LenientParseTest
*/

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
Expand Down Expand Up @@ -127,6 +128,18 @@ public void numFmtStrictIntegerOnlyUsed(String toParse, int expectedValue, int e
dFmt.setParseIntegerOnly(false);
}

@Test // Non-localized, only run once
@EnabledIfSystemProperty(named = "user.language", matches = "en")
public void badExponentParseNumberFormatTest() {
// Some fmt, with an "E" exponent string
DecimalFormat fmt = (DecimalFormat) NumberFormat.getNumberInstance(Locale.US);
// Upon non-numeric in exponent, parse will still successfully complete
// but index should end on the last valid char in exponent
assertEquals(1.23E45, successParse(fmt, "1.23E45.123", 7));
assertEquals(1.23E45, successParse(fmt, "1.23E45.", 7));
assertEquals(1.23E45, successParse(fmt, "1.23E45FOO3222", 7));
}

// ---- CurrencyFormat tests ----
// All input Strings should pass and return expected value.
@ParameterizedTest
Expand Down
14 changes: 13 additions & 1 deletion test/jdk/java/text/Format/NumberFormat/StrictParseTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

/*
* @test
* @bug 8327640
* @bug 8327640 8331485
* @summary Test suite for NumberFormat parsing with strict leniency
* @run junit/othervm -Duser.language=en -Duser.country=US StrictParseTest
* @run junit/othervm -Duser.language=ja -Duser.country=JP StrictParseTest
Expand Down Expand Up @@ -112,7 +112,19 @@ public void uniqueCaseNumberFormatTest() {
successParse(nonLocalizedDFmt, "a12345,67890b");
successParse(nonLocalizedDFmt, "a1234,67890b");
failParse(nonLocalizedDFmt, "a123456,7890b", 6);
}

@Test // Non-localized, only run once
@EnabledIfSystemProperty(named = "user.language", matches = "en")
public void badExponentParseNumberFormatTest() {
// Some fmt, with an "E" exponent string
DecimalFormat fmt = (DecimalFormat) NumberFormat.getNumberInstance(Locale.US);
fmt.setStrict(true);
// Upon non-numeric in exponent, parse will exit early and suffix will not
// exactly match, causing failure
failParse(fmt, "1.23E45.1", 7);
failParse(fmt, "1.23E45.", 7);
failParse(fmt, "1.23E45FOO3222", 7);
}

// All input Strings should fail
Expand Down

1 comment on commit ffb0867

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.