Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overflow prevention when handling date values #16812

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support prefix list for remote repository attributes([#16271](https://github.com/opensearch-project/OpenSearch/pull/16271))
- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
- Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/))
- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).

### Dependencies
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))
Expand Down
24 changes: 24 additions & 0 deletions server/src/main/java/org/opensearch/common/time/DateUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,30 @@ public static Instant clampToNanosRange(Instant instant) {
return instant;
}

static final Instant INSTANT_LONG_MIN_VALUE = Instant.ofEpochMilli(Long.MIN_VALUE);
static final Instant INSTANT_LONG_MAX_VALUE = Instant.ofEpochMilli(Long.MAX_VALUE);

/**
* Clamps the given {@link Instant} to the valid epoch millisecond range.
*
* - If the input is before {@code Long.MIN_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MIN_VALUE)}.
* - If the input is after {@code Long.MAX_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MAX_VALUE)}.
* - Otherwise, it returns the input as-is.
*
* @param instant the {@link Instant} to clamp
* @return the clamped {@link Instant}
* @throws NullPointerException if the input is {@code null}
*/
public static Instant clampToMillisRange(Instant instant) {
if (instant.isBefore(INSTANT_LONG_MIN_VALUE)) {
return INSTANT_LONG_MIN_VALUE;
}
if (instant.isAfter(INSTANT_LONG_MAX_VALUE)) {
return INSTANT_LONG_MAX_VALUE;
}
return instant;
}

/**
* convert a long value to a java time instant
* the long value resembles the nanoseconds since the epoch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public enum Resolution {
MILLISECONDS(CONTENT_TYPE, NumericType.DATE) {
@Override
public long convert(Instant instant) {
return instant.toEpochMilli();
return clampToValidRange(instant).toEpochMilli();
}

@Override
Expand All @@ -132,7 +132,7 @@ public Instant toInstant(long value) {

@Override
public Instant clampToValidRange(Instant instant) {
return instant;
return DateUtils.clampToMillisRange(instant);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,4 +260,21 @@ public void testRoundYear() {
long startOf1996 = Year.of(1996).atDay(1).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
assertThat(DateUtils.roundYear(endOf1996), is(startOf1996));
}

public void testClampToMillisRange() {
Instant normalInstant = Instant.now();
assertEquals(normalInstant, DateUtils.clampToMillisRange(normalInstant));

Instant beforeMinInstant = DateUtils.INSTANT_LONG_MIN_VALUE.minusMillis(1);
assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(beforeMinInstant));

Instant afterMaxInstant = DateUtils.INSTANT_LONG_MAX_VALUE.plusMillis(1);
assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(afterMaxInstant));

assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MIN_VALUE));

assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MAX_VALUE));

assertThrows(NullPointerException.class, () -> DateUtils.clampToMillisRange(null));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ public void testIgnoreMalformedLegacy() throws IOException {
"failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]"
);
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
testIgnoreMalformedForValue("-522000000", "long overflow");
}

public void testIgnoreMalformed() throws IOException {
Expand All @@ -170,7 +169,6 @@ public void testIgnoreMalformed() throws IOException {
"failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]"
);
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
testIgnoreMalformedForValue("-522000000", "long overflow");
}

private void testIgnoreMalformedForValue(String value, String expectedCause) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,32 @@

package org.opensearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
Expand All @@ -71,8 +83,12 @@
import org.joda.time.DateTimeZone;

import java.io.IOException;
import java.time.Instant;
import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

import static org.hamcrest.CoreMatchers.is;
import static org.apache.lucene.document.LongPoint.pack;
Expand Down Expand Up @@ -490,4 +506,187 @@ public void testParseSourceValueNanos() throws IOException {
MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate);
assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null));
}

public void testDateResolutionForOverflow() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));

DateFieldType ft = new DateFieldType(
"test_date",
true,
true,
true,
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
Resolution.MILLISECONDS,
null,
Collections.emptyMap()
);

List<String> dates = Arrays.asList(
null,
"2020-01-01T00:00:00Z",
null,
"2021-01-01T00:00:00Z",
"+292278994-08-17T07:12:55.807Z",
null,
"-292275055-05-16T16:47:04.192Z"
);

int numNullDates = 0;
long minDateValue = Long.MAX_VALUE;
long maxDateValue = Long.MIN_VALUE;

for (int i = 0; i < dates.size(); i++) {
ParseContext.Document doc = new ParseContext.Document();
String dateStr = dates.get(i);

if (dateStr != null) {
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
doc.add(new LongPoint(ft.name(), timestamp));
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
doc.add(new StoredField(ft.name(), timestamp));
doc.add(new StoredField("id", i));
minDateValue = Math.min(minDateValue, timestamp);
maxDateValue = Math.max(maxDateValue, timestamp);
} else {
numNullDates++;
doc.add(new StoredField("id", i));
}
w.addDocument(doc);
}

DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = new IndexSearcher(reader);

Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.build();
QueryShardContext context = new QueryShardContext(
0,
new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings),
BigArrays.NON_RECYCLING_INSTANCE,
null,
null,
null,
null,
null,
xContentRegistry(),
writableRegistry(),
null,
null,
() -> nowInMillis,
null,
null,
() -> true,
null
);

Query rangeQuery = ft.rangeQuery(
"-292275055-05-16T16:47:04.192Z",
"+292278994-08-17T07:12:55.807Z",
true,
true,
null,
null,
null,
context
);

TopDocs topDocs = searcher.search(rangeQuery, dates.size());
assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value);

for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
IndexableField dateField = doc.getField(ft.name());
if (dateField != null) {
long dateValue = dateField.numericValue().longValue();
assertTrue(
"Date value " + dateValue + " should be within valid range",
dateValue >= minDateValue && dateValue <= maxDateValue
);
}
}

DateFieldType ftWithNullValue = new DateFieldType(
"test_date",
true,
true,
true,
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
Resolution.MILLISECONDS,
"2020-01-01T00:00:00Z",
Collections.emptyMap()
);

Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context);
topDocs = searcher.search(nullValueQuery, dates.size());
assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value);

IOUtils.close(reader, w, dir);
}

public void testDateFieldTypeWithNulls() throws IOException {
DateFieldType ft = new DateFieldType(
"domainAttributes.dueDate",
true,
true,
true,
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"),
Resolution.MILLISECONDS,
null,
Collections.emptyMap()
);

Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));

int nullDocs = 3500;
int datedDocs = 50;

for (int i = 0; i < nullDocs; i++) {
ParseContext.Document doc = new ParseContext.Document();
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
w.addDocument(doc);
}

for (int i = 1; i <= datedDocs; i++) {
ParseContext.Document doc = new ParseContext.Document();
String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1);
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
doc.add(new LongPoint(ft.name(), timestamp));
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
doc.add(new StoredField(ft.name(), timestamp));
w.addDocument(doc);
}

DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = new IndexSearcher(reader);

BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST);

Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false));

for (int i = 0; i < 100; i++) {
TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort);
assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
IndexableField dateField = doc.getField(ft.name());
if (dateField != null) {
long dateValue = dateField.numericValue().longValue();
Instant dateInstant = Instant.ofEpochMilli(dateValue);
assertTrue(
"Date should be in March 2022",
dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z"))
&& dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z"))
);
}
}
}
IOUtils.close(reader, w, dir);
}
}
Loading