Skip to content

Commit

Permalink
[ES|QL] Allow DateTime as the third and fourth inputs to auto_bucket (e…
Browse files Browse the repository at this point in the history
…lastic#104547)

* Allow both string and datetime as the third and fourth inputs to auto_bucket

 Committer: Fang Xing <[email protected]>

* Allow both string and datetime as the third and fourth inputs to auto_bucket

* Allow both string and datetime as the third and fourth inputs to auto_bucket

* Allow both string and datetime as the third and fourth inputs to auto_bucket

* Allow both string and datetime as the third and fourth inputs to auto_bucket

* Allow both string and datetime as the third and fourth inputs to auto_bucket
  • Loading branch information
fang-xing-esql authored Jan 25, 2024
1 parent 03c9f89 commit e0514f3
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,17 @@ from employees | where birth_date > now() | sort emp_no asc | keep emp_no, birth
emp_no:integer | birth_date:date
;

autoBucketYearInAgg
FROM employees
| WHERE hire_date >= "1999-01-01T00:00:00Z"
| EVAL bucket = AUTO_BUCKET(hire_date, 5, "1999-01-01T00:00:00Z", NOW())
| STATS COUNT(*) by bucket
| sort bucket;

COUNT(*):long | bucket:date
1 | 1999-01-01T00:00:00.000Z
;


autoBucketMonthInAgg
// tag::auto_bucket_in_agg[]
Expand Down Expand Up @@ -910,7 +921,7 @@ docsAutoBucketLast24hr
//tag::docsAutoBucketLast24hr[]
FROM sample_data
| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
| EVAL bucket = AUTO_BUCKET(@timestamp, 25, DATE_FORMAT(NOW() - 1 day), DATE_FORMAT(NOW()))
| EVAL bucket = AUTO_BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
| STATS COUNT(*) BY bucket
//end::docsAutoBucketLast24hr[]
;
Expand All @@ -922,7 +933,7 @@ docsGettingStartedAutoBucket
// tag::gs-auto_bucket[]
FROM sample_data
| KEEP @timestamp
| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| EVAL bucket = AUTO_BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
// end::gs-auto_bucket[]
| LIMIT 0
;
Expand All @@ -934,7 +945,7 @@ docsGettingStartedAutoBucketStatsBy
// tag::gs-auto_bucket-stats-by[]
FROM sample_data
| KEEP @timestamp, event_duration
| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| EVAL bucket = AUTO_BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| STATS COUNT(*) BY bucket
// end::gs-auto_bucket-stats-by[]
| SORT bucket
Expand All @@ -949,7 +960,7 @@ docsGettingStartedAutoBucketStatsByMedian
// tag::gs-auto_bucket-stats-by-median[]
FROM sample_data
| KEEP @timestamp, event_duration
| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| EVAL bucket = AUTO_BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| STATS median_duration = MEDIAN(event_duration) BY bucket
// end::gs-auto_bucket-stats-by-median[]
| SORT bucket
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ acos |"double acos(n:double|integer|long|unsigned_long)"
asin |"double asin(n:double|integer|long|unsigned_long)"|n |"double|integer|long|unsigned_long" | "" |double | "Inverse sine trigonometric function." | false | false | false
atan |"double atan(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" |double | "Inverse tangent trigonometric function." | false | false | false
atan2 |"double atan2(y:double|integer|long|unsigned_long, x:double|integer|long|unsigned_long)" |[y, x] |["double|integer|long|unsigned_long", "double|integer|long|unsigned_long"] |["", ""] |double | "The angle between the positive x-axis and the ray from the origin to the point (x , y) in the Cartesian plane." | [false, false] | false | false
auto_bucket |"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date, to:integer|long|double|date)" |[field, buckets, from, to] |["integer|long|double|date", "integer", "integer|long|double|date", "integer|long|double|date"] |["", "", "", ""] | "double|date" | "Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into." | [false, false, false, false] | false | false
auto_bucket |"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)" |[field, buckets, from, to] |["integer|long|double|date", "integer", "integer|long|double|date|string", "integer|long|double|date|string"] |["", "", "", ""] | "double|date" | "Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into." | [false, false, false, false] | false | false
avg |"double avg(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |double | "The average of a numeric field." | false | false | true
case |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, rest...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" |[condition, rest] |["boolean", "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version"] |["", ""] |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version" | "Accepts pairs of conditions and values. The function returns the value that belongs to the first condition that evaluates to true." | [false, false] | true | false
ceil |"double|integer|long|unsigned_long ceil(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" | "double|integer|long|unsigned_long" | "Round a number up to the nearest integer." | false | false | false
Expand Down Expand Up @@ -111,7 +111,7 @@ synopsis:keyword
"double asin(n:double|integer|long|unsigned_long)"
"double atan(n:double|integer|long|unsigned_long)"
"double atan2(y:double|integer|long|unsigned_long, x:double|integer|long|unsigned_long)"
"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date, to:integer|long|double|date)"
"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)"
"double avg(field:double|integer|long|unsigned_long)"
"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, rest...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)"
"double|integer|long|unsigned_long ceil(n:double|integer|long|unsigned_long)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

package org.elasticsearch.xpack.esql.expression.function.scalar.math;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Rounding;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.mapper.DateFieldMapper;
Expand All @@ -20,6 +20,7 @@
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.Foldables;
import org.elasticsearch.xpack.ql.expression.Literal;
import org.elasticsearch.xpack.ql.expression.TypeResolutions;
import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
Expand All @@ -40,7 +41,6 @@
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType;

/**
Expand Down Expand Up @@ -90,8 +90,8 @@ public AutoBucket(
Source source,
@Param(name = "field", type = { "integer", "long", "double", "date" }) Expression field,
@Param(name = "buckets", type = { "integer" }) Expression buckets,
@Param(name = "from", type = { "integer", "long", "double", "date" }) Expression from,
@Param(name = "to", type = { "integer", "long", "double", "date" }) Expression to
@Param(name = "from", type = { "integer", "long", "double", "date", "string" }) Expression from,
@Param(name = "to", type = { "integer", "long", "double", "date", "string" }) Expression to
) {
super(source, List.of(field, buckets, from, to));
this.field = field;
Expand All @@ -115,8 +115,8 @@ public ExpressionEvaluator.Factory toEvaluator(Function<Expression, ExpressionEv
int b = ((Number) buckets.fold()).intValue();

if (field.dataType() == DataTypes.DATETIME) {
long f = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(((BytesRef) from.fold()).utf8ToString());
long t = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(((BytesRef) to.fold()).utf8ToString());
long f = foldToLong(from);
long t = foldToLong(to);
return DateTrunc.evaluator(
source(),
toEvaluator.apply(field),
Expand Down Expand Up @@ -182,7 +182,7 @@ protected TypeResolution resolveType() {
}

if (field.dataType() == DataTypes.DATETIME) {
return resolveType((e, o) -> isString(e, sourceText(), o));
return resolveType((e, o) -> isStringOrDate(e, sourceText(), o));
}
if (field.dataType().isNumeric()) {
return resolveType((e, o) -> isNumeric(e, sourceText(), o));
Expand Down Expand Up @@ -216,6 +216,24 @@ private TypeResolution resolveType(BiFunction<Expression, TypeResolutions.ParamO
return isFoldable(to, sourceText(), FOURTH);
}

public static TypeResolution isStringOrDate(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
return TypeResolutions.isType(
e,
exp -> DataTypes.isString(exp) || DataTypes.isDateTime(exp),
operationName,
paramOrd,
"datetime",
"string"
);
}

private long foldToLong(Expression e) {
Object value = Foldables.valueOf(e);
return DataTypes.isDateTime(e.dataType())
? ((Number) value).longValue()
: DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(BytesRefs.toString(value));
}

@Override
public DataType dataType() {
if (field.dataType().isNumeric()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,22 @@ private Expression build(Source source, Expression arg) {
Literal from;
Literal to;
if (arg.dataType() == DataTypes.DATETIME) {
from = new Literal(Source.EMPTY, new BytesRef("2023-02-01T00:00:00.00Z"), DataTypes.KEYWORD);
to = new Literal(Source.EMPTY, new BytesRef("2023-03-01T00:00:00.00Z"), DataTypes.KEYWORD);
from = stringOrDateTime("2023-02-01T00:00:00.00Z");
to = stringOrDateTime("2023-03-01T09:00:00.00Z");
} else {
from = new Literal(Source.EMPTY, 0, DataTypes.DOUBLE);
to = new Literal(Source.EMPTY, 1000, DataTypes.DOUBLE);
}
return new AutoBucket(source, arg, new Literal(Source.EMPTY, 50, DataTypes.INTEGER), from, to);
}

private Literal stringOrDateTime(String date) {
if (randomBoolean()) {
return new Literal(Source.EMPTY, new BytesRef(date), randomBoolean() ? DataTypes.KEYWORD : DataTypes.TEXT);
}
return new Literal(Source.EMPTY, DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(date), DataTypes.DATETIME);
}

@Override
protected DataType expectedType(List<DataType> argTypes) {
if (argTypes.get(0).isNumeric()) {
Expand Down

0 comments on commit e0514f3

Please sign in to comment.