Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ES|QL] Allow DateTime as the third and fourth inputs to auto_bucket #104547

Merged
merged 8 commits into from
Jan 25, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,17 @@ from employees | where birth_date > now() | sort emp_no asc | keep emp_no, birth
emp_no:integer | birth_date:date
;

autoBucketYearInAgg
FROM employees
| WHERE hire_date >= "1999-01-01T00:00:00Z"
| EVAL bucket = AUTO_BUCKET(hire_date, 5, "1999-01-01T00:00:00Z", NOW())
| STATS COUNT(*) by bucket
| sort bucket;

COUNT(*):long | bucket:date
1 | 1999-01-01T00:00:00.000Z
;


autoBucketMonthInAgg
// tag::auto_bucket_in_agg[]
Expand Down Expand Up @@ -910,7 +921,7 @@ docsAutoBucketLast24hr
//tag::docsAutoBucketLast24hr[]
FROM sample_data
| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
| EVAL bucket = AUTO_BUCKET(@timestamp, 25, DATE_FORMAT(NOW() - 1 day), DATE_FORMAT(NOW()))
| EVAL bucket = AUTO_BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
| STATS COUNT(*) BY bucket
//end::docsAutoBucketLast24hr[]
;
Expand All @@ -922,7 +933,7 @@ docsGettingStartedAutoBucket
// tag::gs-auto_bucket[]
FROM sample_data
| KEEP @timestamp
| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| EVAL bucket = AUTO_BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
// end::gs-auto_bucket[]
| LIMIT 0
;
Expand All @@ -934,7 +945,7 @@ docsGettingStartedAutoBucketStatsBy
// tag::gs-auto_bucket-stats-by[]
FROM sample_data
| KEEP @timestamp, event_duration
| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| EVAL bucket = AUTO_BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| STATS COUNT(*) BY bucket
// end::gs-auto_bucket-stats-by[]
| SORT bucket
Expand All @@ -949,7 +960,7 @@ docsGettingStartedAutoBucketStatsByMedian
// tag::gs-auto_bucket-stats-by-median[]
FROM sample_data
| KEEP @timestamp, event_duration
| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| EVAL bucket = AUTO_BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
| STATS median_duration = MEDIAN(event_duration) BY bucket
// end::gs-auto_bucket-stats-by-median[]
| SORT bucket
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ acos |"double acos(n:double|integer|long|unsigned_long)"
asin |"double asin(n:double|integer|long|unsigned_long)"|n |"double|integer|long|unsigned_long" | "" |double | "Inverse sine trigonometric function." | false | false | false
atan |"double atan(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" |double | "Inverse tangent trigonometric function." | false | false | false
atan2 |"double atan2(y:double|integer|long|unsigned_long, x:double|integer|long|unsigned_long)" |[y, x] |["double|integer|long|unsigned_long", "double|integer|long|unsigned_long"] |["", ""] |double | "The angle between the positive x-axis and the ray from the origin to the point (x , y) in the Cartesian plane." | [false, false] | false | false
auto_bucket |"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date, to:integer|long|double|date)" |[field, buckets, from, to] |["integer|long|double|date", "integer", "integer|long|double|date", "integer|long|double|date"] |["", "", "", ""] | "double|date" | "Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into." | [false, false, false, false] | false | false
auto_bucket |"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)" |[field, buckets, from, to] |["integer|long|double|date", "integer", "integer|long|double|date|string", "integer|long|double|date|string"] |["", "", "", ""] | "double|date" | "Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into." | [false, false, false, false] | false | false
avg |"double avg(field:double|integer|long|unsigned_long)" |field |"double|integer|long|unsigned_long" | "" |double | "The average of a numeric field." | false | false | true
case |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, rest...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" |[condition, rest] |["boolean", "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version"] |["", ""] |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version" | "Accepts pairs of conditions and values. The function returns the value that belongs to the first condition that evaluates to true." | [false, false] | true | false
ceil |"double|integer|long|unsigned_long ceil(n:double|integer|long|unsigned_long)" |n |"double|integer|long|unsigned_long" | "" | "double|integer|long|unsigned_long" | "Round a number up to the nearest integer." | false | false | false
Expand Down Expand Up @@ -111,7 +111,7 @@ synopsis:keyword
"double asin(n:double|integer|long|unsigned_long)"
"double atan(n:double|integer|long|unsigned_long)"
"double atan2(y:double|integer|long|unsigned_long, x:double|integer|long|unsigned_long)"
"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date, to:integer|long|double|date)"
"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)"
"double avg(field:double|integer|long|unsigned_long)"
"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, rest...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)"
"double|integer|long|unsigned_long ceil(n:double|integer|long|unsigned_long)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

package org.elasticsearch.xpack.esql.expression.function.scalar.math;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Rounding;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.mapper.DateFieldMapper;
Expand All @@ -20,6 +20,7 @@
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.Foldables;
import org.elasticsearch.xpack.ql.expression.Literal;
import org.elasticsearch.xpack.ql.expression.TypeResolutions;
import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
Expand All @@ -40,7 +41,6 @@
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType;

/**
Expand Down Expand Up @@ -90,8 +90,8 @@ public AutoBucket(
Source source,
@Param(name = "field", type = { "integer", "long", "double", "date" }) Expression field,
@Param(name = "buckets", type = { "integer" }) Expression buckets,
@Param(name = "from", type = { "integer", "long", "double", "date" }) Expression from,
@Param(name = "to", type = { "integer", "long", "double", "date" }) Expression to
@Param(name = "from", type = { "integer", "long", "double", "date", "string" }) Expression from,
@Param(name = "to", type = { "integer", "long", "double", "date", "string" }) Expression to
) {
super(source, List.of(field, buckets, from, to));
this.field = field;
Expand All @@ -115,8 +115,8 @@ public ExpressionEvaluator.Factory toEvaluator(Function<Expression, ExpressionEv
int b = ((Number) buckets.fold()).intValue();

if (field.dataType() == DataTypes.DATETIME) {
long f = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(((BytesRef) from.fold()).utf8ToString());
long t = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(((BytesRef) to.fold()).utf8ToString());
long f = foldToLong(from);
long t = foldToLong(to);
return DateTrunc.evaluator(
source(),
toEvaluator.apply(field),
Expand Down Expand Up @@ -182,7 +182,7 @@ protected TypeResolution resolveType() {
}

if (field.dataType() == DataTypes.DATETIME) {
return resolveType((e, o) -> isString(e, sourceText(), o));
return resolveType((e, o) -> isStringOrDate(e, sourceText(), o));
}
if (field.dataType().isNumeric()) {
return resolveType((e, o) -> isNumeric(e, sourceText(), o));
Expand Down Expand Up @@ -216,6 +216,24 @@ private TypeResolution resolveType(BiFunction<Expression, TypeResolutions.ParamO
return isFoldable(to, sourceText(), FOURTH);
}

public static TypeResolution isStringOrDate(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
return TypeResolutions.isType(
e,
exp -> DataTypes.isString(exp) || DataTypes.isDateTime(exp),
operationName,
paramOrd,
"datetime",
"string"
);
}

private long foldToLong(Expression e) {
Object value = Foldables.valueOf(e);
return DataTypes.isDateTime(e.dataType())
? ((Number) value).longValue()
: DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(BytesRefs.toString(value));
}

@Override
public DataType dataType() {
if (field.dataType().isNumeric()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,22 @@ private Expression build(Source source, Expression arg) {
Literal from;
Literal to;
if (arg.dataType() == DataTypes.DATETIME) {
from = new Literal(Source.EMPTY, new BytesRef("2023-02-01T00:00:00.00Z"), DataTypes.KEYWORD);
to = new Literal(Source.EMPTY, new BytesRef("2023-03-01T00:00:00.00Z"), DataTypes.KEYWORD);
from = stringOrDateTime("2023-02-01T00:00:00.00Z");
to = stringOrDateTime("2023-03-01T09:00:00.00Z");
} else {
from = new Literal(Source.EMPTY, 0, DataTypes.DOUBLE);
to = new Literal(Source.EMPTY, 1000, DataTypes.DOUBLE);
}
return new AutoBucket(source, arg, new Literal(Source.EMPTY, 50, DataTypes.INTEGER), from, to);
}

private Literal stringOrDateTime(String date) {
if (randomBoolean()) {
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
return new Literal(Source.EMPTY, new BytesRef(date), randomBoolean() ? DataTypes.KEYWORD : DataTypes.TEXT);
}
return new Literal(Source.EMPTY, DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(date), DataTypes.DATETIME);
}

@Override
protected DataType expectedType(List<DataType> argTypes) {
if (argTypes.get(0).isNumeric()) {
Expand Down