Skip to content

Commit

Permalink
Fix a DST error in date_histogram (backport #52016) (#52237)
Browse files Browse the repository at this point in the history
When `date_histogram` attempts to optimize itself it for a particular
time zone it checks to see if the entire shard is within the same
"transition". Most time zone transition once every size months or
thereabouts so the optimization can usually kicks in.

*But* it crashes when you attempt feed it a time zone who's last DST
transition was before epoch. The reason for this is a little twisted:
before this patch it'd find the next and previous transitions in
milliseconds since epoch. Then it'd cast them to `Long`s and pass them
into the `DateFieldType` to check if the shard's contents were within
the range. The trouble is they are then converted to `String`s which are
*then* parsed back to `Instant`s which are then convertd to `long`s. And
the parser doesn't like most negative numbers. And everything before
epoch is negative.

This change removes the
`long` -> `Long` -> `String` -> `Instant` -> `long` chain in favor of
passing the `long` -> `Instant` -> `long` which avoids the fairly complex
parsing code and handles a bunch of interesting edge cases around
epoch. And other edge cases around `date_nanos`.

Closes #50265
  • Loading branch information
nik9000 authored Feb 12, 2020
1 parent 12cb6dc commit 7efce22
Show file tree
Hide file tree
Showing 7 changed files with 368 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ setup:
settings:
number_of_replicas: 0
mappings:
"properties":
"number":
"type" : "integer"
"date":
"type" : "date"
properties:
number:
type: integer
date:
type: date
- do:
cluster.health:
wait_for_status: green
Expand Down Expand Up @@ -214,7 +214,10 @@ setup:
mappings:
properties:
date:
type : date
type: date
fields:
nanos:
type: date_nanos

- do:
bulk:
Expand All @@ -239,7 +242,24 @@ setup:
date_histogram:
field: date
calendar_interval: month
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date.nanos
calendar_interval: month
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
Expand Down Expand Up @@ -410,3 +430,63 @@ setup:
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram with pre-epoch daylight savings time transition":
- skip:
version: " - 7.6.99"
reason: bug fixed in 7.7.0. will be backported to 7.6.1
# Add date_nanos to the mapping. We couldn't do it during setup because that
# is run against 6.8 which doesn't have date_nanos
- do:
indices.put_mapping:
index: test_1
body:
properties:
number:
type: integer
date:
type: date
fields:
nanos:
type: date_nanos

- do:
bulk:
index: test_1
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
fixed_interval: 1ms
time_zone: America/Phoenix

- match: { hits.total.value: 1 }
- length: { aggregations.histo.buckets: 1 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date.nanos
fixed_interval: 1ms
time_zone: America/Phoenix

- match: { hits.total.value: 1 }
- length: { aggregations.histo.buckets: 1 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ public static ZoneId of(String zoneId) {
return ZoneId.of(zoneId).normalized();
}

private static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");

static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();

Expand All @@ -231,6 +231,26 @@ public static long toLong(Instant instant) {
return instant.getEpochSecond() * 1_000_000_000 + instant.getNano();
}

/**
* Returns an instant that is with valid nanosecond resolution. If
* the parameter is before the valid nanosecond range then this returns
* the minimum {@linkplain Instant} valid for nanosecond resultion. If
* the parameter is after the valid nanosecond range then this returns
* the maximum {@linkplain Instant} valid for nanosecond resolution.
* <p>
* Useful for checking if all values for the field are within some range,
* even if the range's endpoints are not valid nanosecond resolution.
*/
public static Instant clampToNanosRange(Instant instant) {
if (instant.isBefore(Instant.EPOCH)) {
return Instant.EPOCH;
}
if (instant.isAfter(MAX_NANOSECOND_INSTANT)) {
return MAX_NANOSECOND_INSTANT;
}
return instant;
}

/**
* convert a long value to a java time instant
* the long value resembles the nanoseconds since the epoch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public long convert(Instant instant) {
public Instant toInstant(long value) {
return Instant.ofEpochMilli(value);
}

@Override
public Instant clampToValidRange(Instant instant) {
return instant;
}
},
NANOSECONDS("date_nanos", NumericType.DATE_NANOSECONDS) {
@Override
Expand All @@ -99,6 +104,11 @@ public long convert(Instant instant) {
public Instant toInstant(long value) {
return DateUtils.toInstant(value);
}

@Override
public Instant clampToValidRange(Instant instant) {
return DateUtils.clampToNanosRange(instant);
}
};

private final String type;
Expand All @@ -117,10 +127,18 @@ NumericType numericType() {
return numericType;
}

/**
* Convert an {@linkplain Instant} into a long value in this resolution.
*/
public abstract long convert(Instant instant);

/**
* Convert a long value in this resolution into an instant.
*/
public abstract Instant toInstant(long value);

public abstract Instant clampToValidRange(Instant instant);

public static Resolution ofOrdinal(int ord) {
for (Resolution resolution : values()) {
if (ord == resolution.ordinal()) {
Expand Down Expand Up @@ -440,9 +458,30 @@ public Relation isFieldWithinQuery(IndexReader reader,
}
}

// This check needs to be done after fromInclusive and toInclusive
// are resolved so we can throw an exception if they are invalid
// even if there are no points in the shard
return isFieldWithinRange(reader, fromInclusive, toInclusive);
}

/**
* Return whether all values of the given {@link IndexReader} are within the range,
* outside the range or cross the range. Unlike {@link #isFieldWithinQuery} this
* accepts values that are out of the range of the {@link #resolution} of this field.
* @param fromInclusive start date, inclusive
* @param toInclusive end date, inclusive
*/
public Relation isFieldWithinRange(IndexReader reader, Instant fromInclusive, Instant toInclusive)
throws IOException {
return isFieldWithinRange(reader,
resolution.convert(resolution.clampToValidRange(fromInclusive)),
resolution.convert(resolution.clampToValidRange(toInclusive)));
}

/**
* Return whether all values of the given {@link IndexReader} are within the range,
* outside the range or cross the range.
* @param fromInclusive start date, inclusive, {@link Resolution#convert(Instant) converted} to the appropriate scale
* @param toInclusive end date, inclusive, {@link Resolution#convert(Instant) converted} to the appropriate scale
*/
private Relation isFieldWithinRange(IndexReader reader, long fromInclusive, long toInclusive) throws IOException {
if (PointValues.size(reader, name()) == 0) {
// no points, so nothing matches
return Relation.DISJOINT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,10 @@ public enum Relation {
* {@link Relation#INTERSECTS}, which is always fine to return when there is
* no way to check whether values are actually within bounds. */
public Relation isFieldWithinQuery(
IndexReader reader,
Object from, Object to,
boolean includeLower, boolean includeUpper,
ZoneId timeZone, DateMathParser dateMathParser, QueryRewriteContext context) throws IOException {
IndexReader reader,
Object from, Object to,
boolean includeLower, boolean includeUpper,
ZoneId timeZone, DateMathParser dateMathParser, QueryRewriteContext context) throws IOException {
return Relation.INTERSECTS;
}

Expand Down
Loading

0 comments on commit 7efce22

Please sign in to comment.