Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Begin moving date_histogram to offset rounding (take two) #51271

Merged
merged 6 commits into from
Jan 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,213 @@ setup:

- match: { aggregations.histo.buckets.3.doc_count: 1 }

---
"date_histogram":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram with offset":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month
offset: +1d

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }


---
"date_histogram on range":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

---
"date_histogram on range with offset":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0

- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range

- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'

- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month
offset: +1d

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }
46 changes: 44 additions & 2 deletions server/src/main/java/org/elasticsearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,19 @@ public void writeTo(StreamOutput out) throws IOException {
*/
public abstract long nextRoundingValue(long value);

/**
* How "offset" this rounding is from the traditional "start" of the period.
* @deprecated We're in the process of abstracting offset *into* Rounding
* so keep any usage to migratory shims
*/
@Deprecated
public abstract long offset();

/**
* Strip the {@code offset} from these bounds.
*/
public abstract Rounding withoutOffset();

@Override
public abstract boolean equals(Object obj);

Expand Down Expand Up @@ -420,6 +433,16 @@ public long nextRoundingValue(long utcMillis) {
}
}

@Override
public long offset() {
return 0;
}

@Override
public Rounding withoutOffset() {
return this;
}

@Override
public int hashCode() {
return Objects.hash(unit, timeZone);
Expand Down Expand Up @@ -546,6 +569,16 @@ public long nextRoundingValue(long time) {
.toInstant().toEpochMilli();
}

@Override
public long offset() {
return 0;
}

@Override
public Rounding withoutOffset() {
return this;
}

@Override
public int hashCode() {
return Objects.hash(interval, timeZone);
Expand Down Expand Up @@ -607,8 +640,17 @@ public long round(long value) {

@Override
public long nextRoundingValue(long value) {
// This isn't needed by the current users. We'll implement it when we migrate other users to it.
throw new UnsupportedOperationException("not yet supported");
return delegate.nextRoundingValue(value - offset) + offset;
}

@Override
public long offset() {
return offset;
}

@Override
public Rounding withoutOffset() {
return delegate;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,22 +493,21 @@ protected ValuesSourceAggregatorFactory<ValuesSource> innerBuild(QueryShardConte
AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
final ZoneId tz = timeZone();
// TODO use offset here rather than explicitly in the aggregation
final Rounding rounding = dateHistogramInterval.createRounding(tz, 0);
final Rounding rounding = dateHistogramInterval.createRounding(tz, offset);
final ZoneId rewrittenTimeZone = rewriteTimeZone(queryShardContext);
final Rounding shardRounding;
if (tz == rewrittenTimeZone) {
shardRounding = rounding;
} else {
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, 0);
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, offset);
}

ExtendedBounds roundedBounds = null;
if (this.extendedBounds != null) {
// parse any string bounds to longs and round
roundedBounds = this.extendedBounds.parseAndValidate(name, queryShardContext, config.format()).round(rounding);
}
return new DateHistogramAggregatorFactory(name, config, offset, order, keyed, minDocCount,
return new DateHistogramAggregatorFactory(name, config, order, keyed, minDocCount,
rounding, shardRounding, roundedBounds, queryShardContext, parent, subFactoriesBuilder, metaData);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,16 @@ class DateHistogramAggregator extends BucketsAggregator {
private final ExtendedBounds extendedBounds;

private final LongHash bucketOrds;
private long offset;

DateHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding shardRounding,
long offset, BucketOrder order, boolean keyed,
BucketOrder order, boolean keyed,
long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource.Numeric valuesSource,
DocValueFormat formatter, SearchContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {

super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.rounding = rounding;
this.shardRounding = shardRounding;
this.offset = offset;
this.order = InternalOrder.validate(order, this);
this.keyed = keyed;
this.minDocCount = minDocCount;
Expand Down Expand Up @@ -113,7 +111,7 @@ public void collect(int doc, long bucket) throws IOException {
long value = values.nextValue();
// We can use shardRounding here, which is sometimes more efficient
// if daylight saving times are involved.
long rounded = shardRounding.round(value - offset) + offset;
long rounded = shardRounding.round(value);
assert rounded >= previousRounded;
if (rounded == previousRounded) {
continue;
Expand Down Expand Up @@ -148,9 +146,9 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
// value source will be null for unmapped fields
// Important: use `rounding` here, not `shardRounding`
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, buckets, order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
return new InternalDateHistogram(name, buckets, order, minDocCount, rounding.offset(), emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
}

Expand All @@ -159,8 +157,8 @@ public InternalAggregation buildEmptyAggregation() {
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, rounding.offset(), emptyBucketInfo, formatter,
keyed, pipelineAggregators(), metaData());
}

@Override
Expand Down
Loading