From aba75664090a808e90148d19fc2def97560ef2d4 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 9 Apr 2024 12:22:15 -0400 Subject: [PATCH] ESQL: Better tests to AUTO_BUCKET (#107228) This improves the tests for AUTO_BUCKET marginally, specifically so that it tests all valid combinations of arguments and generates a correct types table. This'll combine nicely with #106782 to generate the signatures that kibana needs for it's editor. --- .../esql/functions/types/auto_bucket.asciidoc | 37 +++- .../src/main/resources/meta.csv-spec | 4 +- .../function/scalar/math/AutoBucket.java | 4 +- .../function/AbstractFunctionTestCase.java | 48 +++-- .../expression/function/TestCaseSupplier.java | 8 + .../function/scalar/math/AutoBucketTests.java | 179 ++++++++++-------- 6 files changed, 180 insertions(+), 100 deletions(-) diff --git a/docs/reference/esql/functions/types/auto_bucket.asciidoc b/docs/reference/esql/functions/types/auto_bucket.asciidoc index 535e2df29c353..cfe74ae25c3d0 100644 --- a/docs/reference/esql/functions/types/auto_bucket.asciidoc +++ b/docs/reference/esql/functions/types/auto_bucket.asciidoc @@ -5,5 +5,40 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== field | buckets | from | to | result - +datetime | integer | datetime | datetime | datetime +datetime | integer | datetime | keyword | datetime +datetime | integer | datetime | text | datetime +datetime | integer | keyword | datetime | datetime +datetime | integer | keyword | keyword | datetime +datetime | integer | keyword | text | datetime +datetime | integer | text | datetime | datetime +datetime | integer | text | keyword | datetime +datetime | integer | text | text | datetime +double | integer | double | double | double +double | integer | double | integer | double +double | integer | double | long | double +double | integer | integer | double | double +double | integer | integer | integer | double +double | integer | integer | long | double +double | integer | long | double | double +double | integer | long | integer | double +double | integer | long | long | double +integer | integer | double | double | double +integer | integer | double | integer | double +integer | integer | double | long | double +integer | integer | integer | double | double +integer | integer | integer | integer | double +integer | integer | integer | long | double +integer | integer | long | double | double +integer | integer | long | integer | double +integer | integer | long | long | double +long | integer | double | double | double +long | integer | double | integer | double +long | integer | double | long | double +long | integer | integer | double | double +long | integer | integer | integer | double +long | integer | integer | long | double +long | integer | long | double | double +long | integer | long | integer | double +long | integer | long | long | double |=== diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index d344b50c0364f..492da4ee5ef36 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -7,7 +7,7 @@ synopsis:keyword "double asin(number:double|integer|long|unsigned_long)" "double atan(number:double|integer|long|unsigned_long)" "double atan2(y_coordinate:double|integer|long|unsigned_long, x_coordinate:double|integer|long|unsigned_long)" -"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|string, to:integer|long|double|date|string)" +"double|date auto_bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date|keyword|text, to:integer|long|double|date|keyword|text)" "double avg(number:double|integer|long)" "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, trueValue...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" "double|integer|long|unsigned_long ceil(number:double|integer|long|unsigned_long)" @@ -117,7 +117,7 @@ acos |number |"double|integer|long|unsigne asin |number |"double|integer|long|unsigned_long" |Number between -1 and 1. If `null`, the function returns `null`. atan |number |"double|integer|long|unsigned_long" |Numeric expression. If `null`, the function returns `null`. atan2 |[y_coordinate, x_coordinate] |["double|integer|long|unsigned_long", "double|integer|long|unsigned_long"] |[y coordinate. If `null`\, the function returns `null`., x coordinate. If `null`\, the function returns `null`.] -auto_bucket |[field, buckets, from, to] |["integer|long|double|date", integer, "integer|long|double|date|string", "integer|long|double|date|string"] |["", "", "", ""] +auto_bucket |[field, buckets, from, to] |["integer|long|double|date", integer, "integer|long|double|date|keyword|text", "integer|long|double|date|keyword|text"] |["", "", "", ""] avg |number |"double|integer|long" |[""] case |[condition, trueValue] |[boolean, "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version"] |["", ""] ceil |number |"double|integer|long|unsigned_long" |Numeric expression. If `null`, the function returns `null`. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucket.java index b9aeff7f1d935..ea581437f6c4f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucket.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucket.java @@ -90,8 +90,8 @@ public AutoBucket( Source source, @Param(name = "field", type = { "integer", "long", "double", "date" }) Expression field, @Param(name = "buckets", type = { "integer" }) Expression buckets, - @Param(name = "from", type = { "integer", "long", "double", "date", "string" }) Expression from, - @Param(name = "to", type = { "integer", "long", "double", "date", "string" }) Expression to + @Param(name = "from", type = { "integer", "long", "double", "date", "keyword", "text" }) Expression from, + @Param(name = "to", type = { "integer", "long", "double", "date", "keyword", "text" }) Expression to ) { super(source, List.of(field, buckets, from, to)); this.field = field; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index 12c141cc7c8a7..889dfbf4c9b17 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -214,7 +214,10 @@ public static ExpressionEvaluator.Factory evaluator(Expression e) { } Layout.Builder builder = new Layout.Builder(); buildLayout(builder, e); - assertTrue(e.resolved()); + Expression.TypeResolution resolution = e.typeResolved(); + if (resolution.unresolved()) { + throw new AssertionError("expected resolved " + resolution.message()); + } return EvalMapper.toEvaluator(e, builder.build()); } @@ -256,7 +259,10 @@ public final void testEvaluate() { } return; } - assertFalse("expected resolved", expression.typeResolved().unresolved()); + Expression.TypeResolution resolution = expression.typeResolved(); + if (resolution.unresolved()) { + throw new AssertionError("expected resolved " + resolution.message()); + } expression = new FoldNull().rule(expression); assertThat(expression.dataType(), equalTo(testCase.expectedType())); logger.info("Result type: " + expression.dataType()); @@ -596,6 +602,28 @@ private static String signatureType(DataType type) { * on input types like {@link Greatest} or {@link Coalesce}. */ protected static List anyNullIsNull(boolean entirelyNullPreservesType, List testCaseSuppliers) { + return anyNullIsNull( + testCaseSuppliers, + (nullPosition, nullValueDataType, original) -> entirelyNullPreservesType == false + && nullValueDataType == DataTypes.NULL + && original.getData().size() == 1 ? DataTypes.NULL : original.expectedType(), + (nullPosition, original) -> original + ); + } + + public interface ExpectedType { + DataType expectedType(int nullPosition, DataType nullValueDataType, TestCaseSupplier.TestCase original); + } + + public interface ExpectedEvaluatorToString { + Matcher evaluatorToString(int nullPosition, Matcher original); + } + + protected static List anyNullIsNull( + List testCaseSuppliers, + ExpectedType expectedType, + ExpectedEvaluatorToString evaluatorToString + ) { typesRequired(testCaseSuppliers); List suppliers = new ArrayList<>(testCaseSuppliers.size()); suppliers.addAll(testCaseSuppliers); @@ -618,15 +646,12 @@ protected static List anyNullIsNull(boolean entirelyNullPreser TestCaseSupplier.TestCase oc = original.get(); List data = IntStream.range(0, oc.getData().size()).mapToObj(i -> { TestCaseSupplier.TypedData od = oc.getData().get(i); - if (i == finalNullPosition) { - return new TestCaseSupplier.TypedData(null, od.type(), od.name()); - } - return od; + return i == finalNullPosition ? od.forceValueToNull() : od; }).toList(); return new TestCaseSupplier.TestCase( data, - oc.evaluatorToString(), - oc.expectedType(), + evaluatorToString.evaluatorToString(finalNullPosition, oc.evaluatorToString()), + expectedType.expectedType(finalNullPosition, oc.getData().get(finalNullPosition).type(), oc), nullValue(), null, oc.getExpectedTypeError(), @@ -649,7 +674,7 @@ protected static List anyNullIsNull(boolean entirelyNullPreser return new TestCaseSupplier.TestCase( data, equalTo("LiteralsEvaluator[lit=null]"), - entirelyNullPreservesType == false && oc.getData().size() == 1 ? DataTypes.NULL : oc.expectedType(), + expectedType.expectedType(finalNullPosition, DataTypes.NULL, oc), nullValue(), null, oc.getExpectedTypeError(), @@ -755,9 +780,8 @@ private static Stream> allPermutations(int argumentCount) { if (argumentCount == 0) { return Stream.of(List.of()); } - if (argumentCount > 4) { - // TODO check for a limit 4. is arbitrary. - throw new IllegalArgumentException("would generate too many types"); + if (argumentCount > 3) { + throw new IllegalArgumentException("would generate too many combinations"); } Stream> stream = representable().map(t -> List.of(t)); for (int i = 1; i < argumentCount; i++) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java index d600e51c07925..c064cfebd9cc5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java @@ -1325,6 +1325,14 @@ public TypedData forceLiteral() { return new TypedData(data, type, name, true); } + /** + * Return a {@link TypedData} that always returns {@code null} for it's + * value without modifying anything else in the supplier. + */ + public TypedData forceValueToNull() { + return new TypedData(null, type, name, forceLiteral); + } + @Override public String toString() { if (type == DataTypes.UNSIGNED_LONG && data instanceof Long longData) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucketTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucketTests.java index 013753c801c39..9d8cf702a375a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucketTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/AutoBucketTests.java @@ -13,126 +13,139 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Rounding; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; -import org.elasticsearch.xpack.esql.expression.function.scalar.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.ql.expression.Expression; -import org.elasticsearch.xpack.ql.expression.Literal; import org.elasticsearch.xpack.ql.tree.Source; import org.elasticsearch.xpack.ql.type.DataType; import org.elasticsearch.xpack.ql.type.DataTypes; import org.hamcrest.Matcher; +import java.util.ArrayList; import java.util.List; +import java.util.function.LongSupplier; import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; -public class AutoBucketTests extends AbstractScalarFunctionTestCase { +public class AutoBucketTests extends AbstractFunctionTestCase { public AutoBucketTests(@Name("TestCase") Supplier testCaseSupplier) { this.testCase = testCaseSupplier.get(); } @ParametersFactory public static Iterable parameters() { - return parameterSuppliersFromTypedData(List.of(new TestCaseSupplier("Autobucket Single date", () -> { - List args = List.of( - new TestCaseSupplier.TypedData( - DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z"), - DataTypes.DATETIME, - "arg" - ) - ); - return new TestCaseSupplier.TestCase( - args, - "DateTruncEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding[DAY_OF_MONTH in Z][fixed to midnight]]", - DataTypes.DATETIME, - dateResultsMatcher(args) - ); - }), new TestCaseSupplier("Autobucket Single long", () -> { - List args = List.of(new TestCaseSupplier.TypedData(100L, DataTypes.LONG, "arg")); - return new TestCaseSupplier.TestCase( - args, - "MulDoublesEvaluator[lhs=FloorDoubleEvaluator[" - + "val=DivDoublesEvaluator[lhs=CastLongToDoubleEvaluator[v=Attribute[channel=0]], " - + "rhs=LiteralsEvaluator[lit=50.0]]], rhs=LiteralsEvaluator[lit=50.0]]", - DataTypes.DOUBLE, - numericResultsMatcher(args, 100.0) - ); - }), new TestCaseSupplier("Autobucket Single int", () -> { - List args = List.of(new TestCaseSupplier.TypedData(100, DataTypes.INTEGER, "arg")); - return new TestCaseSupplier.TestCase( - args, - "MulDoublesEvaluator[lhs=FloorDoubleEvaluator[" - + "val=DivDoublesEvaluator[lhs=CastIntToDoubleEvaluator[v=Attribute[channel=0]], " - + "rhs=LiteralsEvaluator[lit=50.0]]], rhs=LiteralsEvaluator[lit=50.0]]", - DataTypes.DOUBLE, - numericResultsMatcher(args, 100.0) - ); - }), new TestCaseSupplier("Autobucket Single double", () -> { - List args = List.of(new TestCaseSupplier.TypedData(100.0, DataTypes.DOUBLE, "arg")); - return new TestCaseSupplier.TestCase( - args, - "MulDoublesEvaluator[lhs=FloorDoubleEvaluator[val=DivDoublesEvaluator[lhs=Attribute[channel=0], " - + "rhs=LiteralsEvaluator[lit=50.0]]], rhs=LiteralsEvaluator[lit=50.0]]", - DataTypes.DOUBLE, - numericResultsMatcher(args, 100.0) - ); - }))); + List suppliers = new ArrayList<>(); + dateCases(suppliers, "fixed date", () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z")); + numberCases(suppliers, "fixed long", DataTypes.LONG, () -> 100L); + numberCases(suppliers, "fixed int", DataTypes.INTEGER, () -> 100); + numberCases(suppliers, "fixed double", DataTypes.DOUBLE, () -> 100.0); + // TODO make errorsForCasesWithoutExamples do something sensible for 4+ parameters + return parameterSuppliersFromTypedData( + anyNullIsNull( + suppliers, + (nullPosition, nullValueDataType, original) -> nullPosition == 0 && nullValueDataType == DataTypes.NULL + ? DataTypes.NULL + : original.expectedType(), + (nullPosition, original) -> nullPosition == 0 ? original : equalTo("LiteralsEvaluator[lit=null]") + ) + ); } - private Expression build(Source source, Expression arg) { - Literal from; - Literal to; - if (arg.dataType() == DataTypes.DATETIME) { - from = stringOrDateTime("2023-02-01T00:00:00.00Z"); - to = stringOrDateTime("2023-03-01T09:00:00.00Z"); - } else { - from = new Literal(Source.EMPTY, 0, DataTypes.DOUBLE); - to = new Literal(Source.EMPTY, 1000, DataTypes.DOUBLE); - } - return new AutoBucket(source, arg, new Literal(Source.EMPTY, 50, DataTypes.INTEGER), from, to); - } + // TODO once we cast above the functions we can drop these + private static final DataType[] DATE_BOUNDS_TYPE = new DataType[] { DataTypes.DATETIME, DataTypes.KEYWORD, DataTypes.TEXT }; - private Literal stringOrDateTime(String date) { - if (randomBoolean()) { - return new Literal(Source.EMPTY, new BytesRef(date), randomBoolean() ? DataTypes.KEYWORD : DataTypes.TEXT); + private static void dateCases(List suppliers, String name, LongSupplier date) { + for (DataType fromType : DATE_BOUNDS_TYPE) { + for (DataType toType : DATE_BOUNDS_TYPE) { + suppliers.add(new TestCaseSupplier(name, List.of(DataTypes.DATETIME, DataTypes.INTEGER, fromType, toType), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataTypes.DATETIME, "field")); + // TODO more "from" and "to" and "buckets" + args.add(new TestCaseSupplier.TypedData(50, DataTypes.INTEGER, "buckets").forceLiteral()); + args.add(dateBound("from", fromType, "2023-02-01T00:00:00.00Z")); + args.add(dateBound("to", toType, "2023-03-01T09:00:00.00Z")); + return new TestCaseSupplier.TestCase( + args, + "DateTruncEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding[DAY_OF_MONTH in Z][fixed to midnight]]", + DataTypes.DATETIME, + dateResultsMatcher(args) + ); + })); + } } - return new Literal(Source.EMPTY, DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(date), DataTypes.DATETIME); } - @Override - protected DataType expectedType(List argTypes) { - if (argTypes.get(0).isNumeric()) { - return DataTypes.DOUBLE; + private static TestCaseSupplier.TypedData dateBound(String name, DataType type, String date) { + Object value; + if (type == DataTypes.DATETIME) { + value = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(date); + } else { + value = new BytesRef(date); } - return argTypes.get(0); + return new TestCaseSupplier.TypedData(value, type, name).forceLiteral(); } - private static Matcher dateResultsMatcher(List typedData) { - long millis = ((Number) typedData.get(0).data()).longValue(); - return equalTo(Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis)); + private static final DataType[] NUMBER_BOUNDS_TYPES = new DataType[] { DataTypes.INTEGER, DataTypes.LONG, DataTypes.DOUBLE }; + + private static void numberCases(List suppliers, String name, DataType numberType, Supplier number) { + for (DataType fromType : NUMBER_BOUNDS_TYPES) { + for (DataType toType : NUMBER_BOUNDS_TYPES) { + suppliers.add(new TestCaseSupplier(name, List.of(numberType, DataTypes.INTEGER, fromType, toType), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(number.get(), "field")); + // TODO more "from" and "to" and "buckets" + args.add(new TestCaseSupplier.TypedData(50, DataTypes.INTEGER, "buckets").forceLiteral()); + args.add(numericBound("from", fromType, 0.0)); + args.add(numericBound("to", toType, 1000.0)); + // TODO more number types for "from" and "to" + String attr = "Attribute[channel=0]"; + if (numberType == DataTypes.INTEGER) { + attr = "CastIntToDoubleEvaluator[v=" + attr + "]"; + } else if (numberType == DataTypes.LONG) { + attr = "CastLongToDoubleEvaluator[v=" + attr + "]"; + } + return new TestCaseSupplier.TestCase( + args, + "MulDoublesEvaluator[lhs=FloorDoubleEvaluator[val=DivDoublesEvaluator[lhs=" + + attr + + ", " + + "rhs=LiteralsEvaluator[lit=50.0]]], rhs=LiteralsEvaluator[lit=50.0]]", + DataTypes.DOUBLE, + dateResultsMatcher(args) + ); + })); + } + } } - private static Matcher numericResultsMatcher(List typedData, Object value) { - return equalTo(value); + private static TestCaseSupplier.TypedData numericBound(String name, DataType type, double value) { + Number v; + if (type == DataTypes.INTEGER) { + v = (int) value; + } else if (type == DataTypes.LONG) { + v = (long) value; + } else { + v = value; + } + return new TestCaseSupplier.TypedData(v, type, name).forceLiteral(); } - @Override - protected List argSpec() { - DataType[] numerics = numerics(); - DataType[] all = new DataType[numerics.length + 1]; - all[0] = DataTypes.DATETIME; - System.arraycopy(numerics, 0, all, 1, numerics.length); - return List.of(required(all)); + private static Matcher dateResultsMatcher(List typedData) { + if (typedData.get(0).type() == DataTypes.DATETIME) { + long millis = ((Number) typedData.get(0).data()).longValue(); + return equalTo(Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis)); + } + return equalTo(((Number) typedData.get(0).data()).doubleValue()); } @Override protected Expression build(Source source, List args) { - return build(source, args.get(0)); + return new AutoBucket(source, args.get(0), args.get(1), args.get(2), args.get(3)); } @Override - protected Matcher badTypeError(List spec, int badArgPosition, DataType badArgType) { - return equalTo("first argument of [exp] must be [datetime or numeric], found value [arg0] type [" + badArgType.typeName() + "]"); + public void testSimpleWithNulls() { + assumeFalse("we test nulls in parameters", true); } }