Skip to content

Commit

Permalink
Begin moving date_histogram to offset rounding (take two) (#51271)
Browse files Browse the repository at this point in the history
We added a new rounding in #50609 that handles offsets to the start and
end of the rounding so that we could support `offset` in the `composite`
aggregation. This starts moving `date_histogram` to that new offset.

This is a redo of #50873 with more integration tests.

This reverts commit d114c9d.
  • Loading branch information
nik9000 authored Jan 27, 2020
1 parent d1de74e commit 0483f7c
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,213 @@ setup:

- match: { aggregations.histo.buckets.3.doc_count: 1 }

---
"date_histogram":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram with offset":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month
offset: +1d

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }


---
"date_histogram on range":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram on range with offset":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month
offset: +1d

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }
46 changes: 44 additions & 2 deletions server/src/main/java/org/elasticsearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,19 @@ public void writeTo(StreamOutput out) throws IOException {
*/
public abstract long nextRoundingValue(long value);

/**
* How "offset" this rounding is from the traditional "start" of the period.
* @deprecated We're in the process of abstracting offset *into* Rounding
* so keep any usage to migratory shims
*/
@Deprecated
public abstract long offset();

/**
* Strip the {@code offset} from these bounds.
*/
public abstract Rounding withoutOffset();

@Override
public abstract boolean equals(Object obj);

Expand Down Expand Up @@ -420,6 +433,16 @@ public long nextRoundingValue(long utcMillis) {
}
}

@Override
public long offset() {
return 0;
}

@Override
public Rounding withoutOffset() {
return this;
}

@Override
public int hashCode() {
return Objects.hash(unit, timeZone);
Expand Down Expand Up @@ -546,6 +569,16 @@ public long nextRoundingValue(long time) {
.toInstant().toEpochMilli();
}

@Override
public long offset() {
return 0;
}

@Override
public Rounding withoutOffset() {
return this;
}

@Override
public int hashCode() {
return Objects.hash(interval, timeZone);
Expand Down Expand Up @@ -607,8 +640,17 @@ public long round(long value) {

@Override
public long nextRoundingValue(long value) {
// This isn't needed by the current users. We'll implement it when we migrate other users to it.
throw new UnsupportedOperationException("not yet supported");
return delegate.nextRoundingValue(value - offset) + offset;
}

@Override
public long offset() {
return offset;
}

@Override
public Rounding withoutOffset() {
return delegate;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,22 +493,21 @@ protected ValuesSourceAggregatorFactory<ValuesSource> innerBuild(QueryShardConte
AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
final ZoneId tz = timeZone();
// TODO use offset here rather than explicitly in the aggregation
final Rounding rounding = dateHistogramInterval.createRounding(tz, 0);
final Rounding rounding = dateHistogramInterval.createRounding(tz, offset);
final ZoneId rewrittenTimeZone = rewriteTimeZone(queryShardContext);
final Rounding shardRounding;
if (tz == rewrittenTimeZone) {
shardRounding = rounding;
} else {
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, 0);
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, offset);
}

ExtendedBounds roundedBounds = null;
if (this.extendedBounds != null) {
// parse any string bounds to longs and round
roundedBounds = this.extendedBounds.parseAndValidate(name, queryShardContext, config.format()).round(rounding);
}
return new DateHistogramAggregatorFactory(name, config, offset, order, keyed, minDocCount,
return new DateHistogramAggregatorFactory(name, config, order, keyed, minDocCount,
rounding, shardRounding, roundedBounds, queryShardContext, parent, subFactoriesBuilder, metaData);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,16 @@ class DateHistogramAggregator extends BucketsAggregator {
private final ExtendedBounds extendedBounds;

private final LongHash bucketOrds;
private long offset;

DateHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding shardRounding,
long offset, BucketOrder order, boolean keyed,
BucketOrder order, boolean keyed,
long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource.Numeric valuesSource,
DocValueFormat formatter, SearchContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {

super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.rounding = rounding;
this.shardRounding = shardRounding;
this.offset = offset;
this.order = InternalOrder.validate(order, this);
this.keyed = keyed;
this.minDocCount = minDocCount;
Expand Down Expand Up @@ -113,7 +111,7 @@ public void collect(int doc, long bucket) throws IOException {
long value = values.nextValue();
// We can use shardRounding here, which is sometimes more efficient
// if daylight saving times are involved.
long rounded = shardRounding.round(value - offset) + offset;
long rounded = shardRounding.round(value);
assert rounded >= previousRounded;
if (rounded == previousRounded) {
continue;
Expand Down Expand Up @@ -148,9 +146,9 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
// value source will be null for unmapped fields
// Important: use `rounding` here, not `shardRounding`
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, buckets, order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
return new InternalDateHistogram(name, buckets, order, minDocCount, rounding.offset(), emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
}

Expand All @@ -159,8 +157,8 @@ public InternalAggregation buildEmptyAggregation() {
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, rounding.offset(), emptyBucketInfo, formatter,
keyed, pipelineAggregators(), metaData());
}

@Override
Expand Down
Loading

0 comments on commit 0483f7c

Please sign in to comment.