Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XContentParser shouldn't lose data from floating-point numbers. #46531

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/46531.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 46531
summary: XContentParser shouldn't lose data from floating-point numbers
area: Infra/Core
type: bug
issues:
- 46261
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ And the following may be the response:
]
},
"avg_monthly_sales": {
"value": 328.33333333333333
"value": 328.3333333333333
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to reviewers: this only passed before because parsing was lossy

}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ default void copyCurrentEvent(XContentParser parser) throws IOException {
case DOUBLE:
writeNumber(parser.doubleValue());
break;
case BIG_INTEGER:
writeNumber((BigInteger) parser.numberValue());
break;
case BIG_DECIMAL:
writeNumber((BigDecimal) parser.numberValue());
break;
default:
throw new UnsupportedOperationException("Unknown number type: " + parser.numberType());
}
break;
case VALUE_BOOLEAN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,10 @@ public void copyCurrentStructure(XContentParser parser) throws IOException {
if (parser.currentToken() == null) {
parser.nextToken();
}
if (parser instanceof JsonXContentParser) {
generator.copyCurrentStructure(((JsonXContentParser) parser).parser);
} else {
copyCurrentStructure(this, parser);
}
// We don't optimize by forwarding to the wrapped generator due to
// information loss with floating-point numbers that can't be
// represented as a double.
copyCurrentStructure(this, parser);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.core.internal.io.IOUtils;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.CharBuffer;

public class JsonXContentParser extends AbstractXContentParser {
Expand Down Expand Up @@ -148,7 +149,11 @@ public int textOffset() throws IOException {

@Override
public Number numberValue() throws IOException {
return parser.getNumberValue();
Number number = parser.getNumberValue();
if (number instanceof Double && isDouble(textCharacters(), textOffset(), textLength()) == false) {
number = parser.getDecimalValue();
}
return number;
}

@Override
Expand Down Expand Up @@ -206,7 +211,15 @@ private NumberType convertNumberType(JsonParser.NumberType numberType) {
case FLOAT:
return NumberType.FLOAT;
case DOUBLE:
return NumberType.DOUBLE;
try {
if (isDouble(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength())) {
return NumberType.DOUBLE;
} else {
return NumberType.BIG_DECIMAL;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
case BIG_DECIMAL:
return NumberType.BIG_DECIMAL;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Supplier;

public abstract class AbstractXContentParser implements XContentParser {
Expand All @@ -55,6 +56,86 @@ private static void checkCoerceString(boolean coerce, Class<? extends Number> cl
}
}

/**
* If the provided char sequence represents a number, then this method
* returns true if, and only if the given char buffer represents a
* non-finite double, or there exists a double whose string representation
* is mathematically equal to the number represented in the provided char
* sequence. The behavior is undefined if the given char buffer doesn't
* actually represent a number.
*/
protected static boolean isDouble(char[] chars, int charsOff, int charsLen) {
Objects.checkFromIndexSize(charsOff, charsLen, chars.length);
if (charsLen <= 17) { // 15 significant digits, plus '.' and '-'
// Avoid numbers that use a scientific notation because they might
// be short yet have an exponent that is greater than the maximum
// double exponent, eg. 9E999.
boolean scientificNotation = false;
int numSigDigits = 0;
for (int i = charsOff, end = charsOff + charsLen; i < end; ++i) {
char c = chars[i];
if (c >= '0' && c <= '9') {
numSigDigits++;
} else if (c != '-' && c != '.') {
scientificNotation = true;
break;
}
}
if (scientificNotation == false && numSigDigits <= 15) { // Fast path
// Doubles have 53 bits of mantissa including the implicit bit.
// If a String with 15 significant digits or less was not the
// string representation of a double, it would mean that two
// consecutive doubles would differ (relatively) by more than
// 10^-15, which is impossible since 10^-15 > 2^-53.
return true;
}
}
return slowIsDouble(chars, charsOff, charsLen);
}

private static final int MAX_DOUBLE_BASE10_EXPONENT = getBase10Exponent(BigDecimal.valueOf(Double.MAX_VALUE));
private static final int MIN_NORMAL_DOUBLE_BASE10_EXPONENT = getBase10Exponent(BigDecimal.valueOf(Double.MIN_NORMAL));

// pkg-private for testing
static boolean slowIsDouble(char[] chars, int charsOff, int charsLen) {
try {
BigDecimal bigDec = new BigDecimal(chars, charsOff, charsLen);
if (bigDec.precision() <= 15) { // See comment in #isDouble
final int base10Exponent = getBase10Exponent(bigDec);
if (base10Exponent < MAX_DOUBLE_BASE10_EXPONENT &&
base10Exponent > MIN_NORMAL_DOUBLE_BASE10_EXPONENT) {
return true;
}
}
return slowIsDouble(bigDec);
} catch (NumberFormatException e) {
// We need to return true for NaN and +/-Infinity
// For malformed strings, the return value is undefined, so true is fine too.
return true;
}
}

/**
* Return the exponent of {@code bigDec} in its scientific base 10 representation.
*/
static int getBase10Exponent(BigDecimal bigDec) {
// A bigdecimal is equal to unscaledValue*10^-scale and the
// unscaled value can be written as C * 10 ^(precision-1) where
// C is in [1,10). So the bigdecimal is equal to
// C * 10^(precision-scale-1)
return bigDec.precision() - bigDec.scale() - 1;
}

private static boolean slowIsDouble(BigDecimal bigDec) {
double asDouble = bigDec.doubleValue();
if (Double.isFinite(asDouble) == false) {
return false;
}
// Don't use equals since it returns false for decimals that have the
// same value but different scales.
return bigDec.compareTo(new BigDecimal(Double.toString(asDouble))) == 0;
}

private final NamedXContentRegistry xContentRegistry;
private final DeprecationHandler deprecationHandler;

Expand Down Expand Up @@ -397,4 +478,5 @@ public NamedXContentRegistry getXContentRegistry() {
public DeprecationHandler getDeprecationHandler() {
return deprecationHandler;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,18 @@
package org.elasticsearch.common.xcontent;

import com.fasterxml.jackson.core.JsonParseException;

import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matchers;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -90,6 +95,140 @@ public void testFloat() throws IOException {
}
}

public void testDetectBigIntegerJSON() throws IOException {
doTestDetectBigInteger(XContentType.JSON);
}

public void testDetectBigIntegerCBOR() throws IOException {
doTestDetectBigInteger(XContentType.CBOR);
}

public void testDetectBigIntegerSmile() throws IOException {
doTestDetectBigInteger(XContentType.SMILE);
}

public void testDetectBigIntegerYaml() throws IOException {
doTestDetectBigInteger(XContentType.YAML);
}

private void doTestDetectBigInteger(XContentType xcontentType) throws IOException {
BytesStreamOutput out = new BytesStreamOutput();
XContentGenerator generator = xcontentType.xContent().createGenerator(out);
generator.writeStartObject();
generator.writeFieldName("foo");
BigInteger bigInt = new BigInteger("9999999999999999999999999999999");
assertThat(bigInt, Matchers.greaterThan(BigInteger.valueOf(Long.MAX_VALUE)));
generator.writeNumber(bigInt);
generator.writeEndObject();
generator.flush();

XContentParser parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("foo", parser.currentName());
assertEquals(XContentParser.Token.VALUE_NUMBER, parser.nextToken());
assertEquals(XContentParser.NumberType.BIG_INTEGER, parser.numberType());
assertEquals(bigInt, parser.numberValue());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());

parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
Map<String, Object> map = parser.map();
assertEquals(Collections.singletonMap("foo", new BigInteger("9999999999999999999999999999999")), map);

parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
out = new BytesStreamOutput();
generator = xcontentType.xContent().createGenerator(out);
generator.copyCurrentStructure(parser);
generator.flush();
parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
map = parser.map();
assertEquals(Collections.singletonMap("foo", new BigInteger("9999999999999999999999999999999")), map);

parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
out = new BytesStreamOutput();
// filtering triggers different logic
generator = xcontentType.xContent().createGenerator(out, Collections.emptySet(), Collections.singleton("bar"));
generator.copyCurrentStructure(parser);
generator.flush();
parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
map = parser.map();
assertEquals(Collections.singletonMap("foo", new BigInteger("9999999999999999999999999999999")), map);
}

public void testDetectBigDecimalJSON() throws IOException {
doTestDetectBigDecimal(XContentType.JSON);
}

public void testDetectBigDecimalCBOR() throws IOException {
doTestDetectBigDecimal(XContentType.CBOR);
}

public void testDetectBigDecimalSmile() throws IOException {
doTestDetectBigDecimal(XContentType.SMILE);
}

public void testDetectBigDecimalYaml() throws IOException {
doTestDetectBigDecimal(XContentType.YAML);
}

private void doTestDetectBigDecimal(XContentType xcontentType) throws IOException {
BytesStreamOutput out = new BytesStreamOutput();
XContentGenerator generator = xcontentType.xContent().createGenerator(out);
generator.writeStartObject();
generator.writeFieldName("foo");
BigDecimal bigDec = new BigDecimal("4.000000000000000000000000002");
generator.writeNumber(bigDec);
generator.writeEndObject();
generator.flush();

XContentParser parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("foo", parser.currentName());
assertEquals(XContentParser.Token.VALUE_NUMBER, parser.nextToken());
assertEquals(XContentParser.NumberType.BIG_DECIMAL, parser.numberType());
assertEquals(bigDec, parser.numberValue());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());

// parser.map()
parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
Map<String, Object> map = parser.map();
assertEquals(Collections.singletonMap("foo", new BigDecimal("4.000000000000000000000000002")), map);

parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
out = new BytesStreamOutput();
generator = xcontentType.xContent().createGenerator(out);
generator.copyCurrentStructure(parser);
generator.flush();
parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
map = parser.map();
assertEquals(Collections.singletonMap("foo", new BigDecimal("4.000000000000000000000000002")), map);

parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
out = new BytesStreamOutput();
// filtering triggers different logic
generator = xcontentType.xContent().createGenerator(out, Collections.emptySet(), Collections.singleton("bar"));
generator.copyCurrentStructure(parser);
generator.flush();
parser = xcontentType.xContent().createParser(NamedXContentRegistry.EMPTY,
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, out.bytes().streamInput());
map = parser.map();
assertEquals(Collections.singletonMap("foo", new BigDecimal("4.000000000000000000000000002")), map);
}

public void testReadList() throws IOException {
assertThat(readList("{\"foo\": [\"bar\"]}"), contains("bar"));
assertThat(readList("{\"foo\": [\"bar\",\"baz\"]}"), contains("bar", "baz"));
Expand Down
Loading