From 39a2d17fe512bd13fb7c506c270d63244bd5eb1b Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 10:17:56 +0100 Subject: [PATCH 01/33] test case --- .../testFixtures/src/main/resources/hash.csv-spec | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec new file mode 100644 index 000000000000..69f3d8ce648c --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -0,0 +1,13 @@ +md5 +ROW value = "test" | EVAL hash = hash("md5", value); + +value | hash +test | 098f6bcd4621d373cade4e832627b4f6 +; + +sha256 +ROW value = "test" | EVAL hash = hash("sha256", value); + +value | hash +test | 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 +; From a9ff454ea834dc9836e682498ed6b505e3fc4a36 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 12:29:52 +0100 Subject: [PATCH 02/33] dummy implementation --- .../src/main/resources/hash.csv-spec | 8 +- .../function/scalar/hash/HashEvaluator.java | 152 ++++++++++++++++++ .../function/EsqlFunctionRegistry.java | 9 +- .../scalar/ScalarFunctionWritables.java | 2 + .../expression/function/scalar/hash/Hash.java | 96 +++++++++++ 5 files changed, 261 insertions(+), 6 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index 69f3d8ce648c..c66e6dddf7b5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -1,13 +1,13 @@ md5 ROW value = "test" | EVAL hash = hash("md5", value); -value | hash -test | 098f6bcd4621d373cade4e832627b4f6 +value:keyword | hash:keyword +test | 098f6bcd4621d373cade4e832627b4f6 ; sha256 ROW value = "test" | EVAL hash = hash("sha256", value); -value | hash -test | 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 +value:keyword | hash:keyword +test | 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 ; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java new file mode 100644 index 000000000000..97b985966ed7 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java @@ -0,0 +1,152 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.hash; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Hash}. + * This class is generated. Do not edit it. + */ +public final class HashEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final EvalOperator.ExpressionEvaluator alg; + + private final EvalOperator.ExpressionEvaluator input; + + private final DriverContext driverContext; + + private Warnings warnings; + + public HashEvaluator(Source source, EvalOperator.ExpressionEvaluator alg, + EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { + this.source = source; + this.alg = alg; + this.input = input; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock algBlock = (BytesRefBlock) alg.eval(page)) { + try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) { + BytesRefVector algVector = algBlock.asVector(); + if (algVector == null) { + return eval(page.getPositionCount(), algBlock, inputBlock); + } + BytesRefVector inputVector = inputBlock.asVector(); + if (inputVector == null) { + return eval(page.getPositionCount(), algBlock, inputBlock); + } + return eval(page.getPositionCount(), algVector, inputVector).asBlock(); + } + } + } + + public BytesRefBlock eval(int positionCount, BytesRefBlock algBlock, BytesRefBlock inputBlock) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef algScratch = new BytesRef(); + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (algBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (algBlock.getValueCount(p) != 1) { + if (algBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + if (inputBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (inputBlock.getValueCount(p) != 1) { + if (inputBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + result.appendBytesRef(Hash.process(algBlock.getBytesRef(algBlock.getFirstValueIndex(p), algScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + } + return result.build(); + } + } + + public BytesRefVector eval(int positionCount, BytesRefVector algVector, + BytesRefVector inputVector) { + try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { + BytesRef algScratch = new BytesRef(); + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + result.appendBytesRef(Hash.process(algVector.getBytesRef(p, algScratch), inputVector.getBytesRef(p, inputScratch))); + } + return result.build(); + } + } + + @Override + public String toString() { + return "HashEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(alg, input); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory alg; + + private final EvalOperator.ExpressionEvaluator.Factory input; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory alg, + EvalOperator.ExpressionEvaluator.Factory input) { + this.source = source; + this.alg = alg; + this.input = input; + } + + @Override + public HashEvaluator get(DriverContext context) { + return new HashEvaluator(source, alg.get(context), input.get(context), context); + } + + @Override + public String toString() { + return "HashEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 37b159922906..85f8c46a3864 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -67,6 +67,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateParse; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc; import org.elasticsearch.xpack.esql.expression.function.scalar.date.Now; +import org.elasticsearch.xpack.esql.expression.function.scalar.hash.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.IpPrefix; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Abs; @@ -403,8 +404,12 @@ private static FunctionDefinition[][] functions() { def(MvSum.class, MvSum::new, "mv_sum"), def(Split.class, Split::new, "split") }, // fulltext functions - new FunctionDefinition[] { def(Match.class, Match::new, "match"), def(QueryString.class, QueryString::new, "qstr") } }; - + new FunctionDefinition[] { def(Match.class, Match::new, "match"), def(QueryString.class, QueryString::new, "qstr") }, + // hash + new FunctionDefinition[] { + def(Hash.class, Hash::new, "hash") + } + }; } private static FunctionDefinition[][] snapshotFunctions() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 192ca6c43e57..40c29c7ea388 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -20,6 +20,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateParse; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc; import org.elasticsearch.xpack.esql.expression.function.scalar.date.Now; +import org.elasticsearch.xpack.esql.expression.function.scalar.hash.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.IpPrefix; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Atan2; @@ -92,6 +93,7 @@ public static List getNamedWriteables() { entries.add(Tau.ENTRY); entries.add(ToLower.ENTRY); entries.add(ToUpper.ENTRY); + entries.add(Hash.ENTRY); entries.addAll(GroupingWritables.getNamedWriteables()); return entries; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java new file mode 100644 index 000000000000..9b86b18c4583 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -0,0 +1,96 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.hash; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.List; + +public class Hash extends EsqlScalarFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hash", Hash::new); + + private final Expression alg; + private final Expression input; + + @FunctionInfo( + returnType = "keyword", + description = "Computes the hash of the input keyword." + ) + public Hash( + Source source, + @Param(name = "alg", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression alg, + @Param(name = "input", type = { "keyword", "text" }, description = "Input to hash.") Expression input + ) { + super(source, List.of(alg, input)); + this.alg = alg; + this.input = input; + } + + private Hash(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + in.readNamedWriteable(Expression.class), + in.readNamedWriteable(Expression.class) + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(alg); + out.writeNamedWriteable(input); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return DataType.KEYWORD; + } + + @Evaluator + static BytesRef process( + BytesRef alg, + BytesRef input + ) { + return input; + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + return new HashEvaluator.Factory(source(), toEvaluator.apply(alg), toEvaluator.apply(input)); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Hash(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Hash::new, children().get(0), children().get(1)); + } +} From 5101864c687804aff46932dcd8769409dfb2163e Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 12:42:52 +0100 Subject: [PATCH 03/33] simple implementation --- .../function/scalar/hash/HashEvaluator.java | 21 ++++++++++++++----- .../expression/function/scalar/hash/Hash.java | 13 ++++++------ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java index 97b985966ed7..a814cff1f81d 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java @@ -7,6 +7,7 @@ import java.lang.IllegalArgumentException; import java.lang.Override; import java.lang.String; +import java.security.NoSuchAlgorithmException; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; @@ -53,7 +54,7 @@ public Block eval(Page page) { if (inputVector == null) { return eval(page.getPositionCount(), algBlock, inputBlock); } - return eval(page.getPositionCount(), algVector, inputVector).asBlock(); + return eval(page.getPositionCount(), algVector, inputVector); } } } @@ -85,19 +86,29 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock algBlock, BytesRefBlo result.appendNull(); continue position; } - result.appendBytesRef(Hash.process(algBlock.getBytesRef(algBlock.getFirstValueIndex(p), algScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + try { + result.appendBytesRef(Hash.process(algBlock.getBytesRef(algBlock.getFirstValueIndex(p), algScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + } catch (NoSuchAlgorithmException e) { + warnings().registerException(e); + result.appendNull(); + } } return result.build(); } } - public BytesRefVector eval(int positionCount, BytesRefVector algVector, + public BytesRefBlock eval(int positionCount, BytesRefVector algVector, BytesRefVector inputVector) { - try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { BytesRef algScratch = new BytesRef(); BytesRef inputScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { - result.appendBytesRef(Hash.process(algVector.getBytesRef(p, algScratch), inputVector.getBytesRef(p, inputScratch))); + try { + result.appendBytesRef(Hash.process(algVector.getBytesRef(p, algScratch), inputVector.getBytesRef(p, inputScratch))); + } catch (NoSuchAlgorithmException e) { + warnings().registerException(e); + result.appendNull(); + } } return result.build(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 9b86b18c4583..93a0e0e80fe9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -23,6 +23,9 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HexFormat; import java.util.List; public class Hash extends EsqlScalarFunction { @@ -71,12 +74,10 @@ public DataType dataType() { return DataType.KEYWORD; } - @Evaluator - static BytesRef process( - BytesRef alg, - BytesRef input - ) { - return input; + @Evaluator(warnExceptions = NoSuchAlgorithmException.class) + static BytesRef process(BytesRef alg, BytesRef input) throws NoSuchAlgorithmException { + byte[] digest = MessageDigest.getInstance(alg.utf8ToString()).digest(input.utf8ToString().getBytes()); + return new BytesRef(HexFormat.of().formatHex(digest)); } @Override From fabaabb31176a9c95e0dd5b3ccdacadd80385f59 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 12:56:26 +0100 Subject: [PATCH 04/33] attempt to reuse MessageDigest --- .../scalar/hash/HashConstantEvaluator.java | 134 ++++++++++++++++++ .../expression/function/scalar/hash/Hash.java | 24 +++- 2 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java new file mode 100644 index 000000000000..f175afbe5e6e --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java @@ -0,0 +1,134 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.hash; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.security.MessageDigest; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Hash}. + * This class is generated. Do not edit it. + */ +public final class HashConstantEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final MessageDigest alg; + + private final EvalOperator.ExpressionEvaluator input; + + private final DriverContext driverContext; + + private Warnings warnings; + + public HashConstantEvaluator(Source source, MessageDigest alg, + EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { + this.source = source; + this.alg = alg; + this.input = input; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) { + BytesRefVector inputVector = inputBlock.asVector(); + if (inputVector == null) { + return eval(page.getPositionCount(), inputBlock); + } + return eval(page.getPositionCount(), inputVector).asBlock(); + } + } + + public BytesRefBlock eval(int positionCount, BytesRefBlock inputBlock) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (inputBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (inputBlock.getValueCount(p) != 1) { + if (inputBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + result.appendBytesRef(Hash.processConstant(this.alg, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + } + return result.build(); + } + } + + public BytesRefVector eval(int positionCount, BytesRefVector inputVector) { + try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + result.appendBytesRef(Hash.processConstant(this.alg, inputVector.getBytesRef(p, inputScratch))); + } + return result.build(); + } + } + + @Override + public String toString() { + return "HashConstantEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(input); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final Function alg; + + private final EvalOperator.ExpressionEvaluator.Factory input; + + public Factory(Source source, Function alg, + EvalOperator.ExpressionEvaluator.Factory input) { + this.source = source; + this.alg = alg; + this.input = input; + } + + @Override + public HashConstantEvaluator get(DriverContext context) { + return new HashConstantEvaluator(source, alg.apply(context), input.get(context), context); + } + + @Override + public String toString() { + return "HashConstantEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 93a0e0e80fe9..25428feb93ae 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -76,13 +77,30 @@ public DataType dataType() { @Evaluator(warnExceptions = NoSuchAlgorithmException.class) static BytesRef process(BytesRef alg, BytesRef input) throws NoSuchAlgorithmException { - byte[] digest = MessageDigest.getInstance(alg.utf8ToString()).digest(input.utf8ToString().getBytes()); - return new BytesRef(HexFormat.of().formatHex(digest)); + return hash(MessageDigest.getInstance(alg.utf8ToString()), input); + } + + @Evaluator(extraName = "Constant") + static BytesRef processConstant(@Fixed(build = true) MessageDigest alg, BytesRef input) { + return hash(alg, input); + } + + private static BytesRef hash(MessageDigest alg, BytesRef input) { + return new BytesRef(HexFormat.of().formatHex(alg.digest(input.utf8ToString().getBytes()))); } @Override public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { - return new HashEvaluator.Factory(source(), toEvaluator.apply(alg), toEvaluator.apply(input)); + if (alg.foldable() && alg.dataType() == DataType.KEYWORD) { + try { + var md = MessageDigest.getInstance(((BytesRef) alg.fold()).utf8ToString()); + return new HashConstantEvaluator.Factory(source(), context -> md, toEvaluator.apply(input)); + } catch (NoSuchAlgorithmException e) { + throw new IllegalArgumentException(e); + } + } else { + return new HashEvaluator.Factory(source(), toEvaluator.apply(alg), toEvaluator.apply(input)); + } } @Override From 33d54734771b8c1f280257b1e7c6d4db62abc014 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 13:35:33 +0100 Subject: [PATCH 05/33] unit tests --- .../function/EsqlFunctionRegistry.java | 5 +- .../expression/function/scalar/hash/Hash.java | 16 ++-- .../function/scalar/hash/HashTests.java | 80 +++++++++++++++++++ 3 files changed, 88 insertions(+), 13 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 85f8c46a3864..b99f649eaafa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -406,10 +406,7 @@ private static FunctionDefinition[][] functions() { // fulltext functions new FunctionDefinition[] { def(Match.class, Match::new, "match"), def(QueryString.class, QueryString::new, "qstr") }, // hash - new FunctionDefinition[] { - def(Hash.class, Hash::new, "hash") - } - }; + new FunctionDefinition[] { def(Hash.class, Hash::new, "hash") } }; } private static FunctionDefinition[][] snapshotFunctions() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 25428feb93ae..af763e724ec7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -36,10 +36,7 @@ public class Hash extends EsqlScalarFunction { private final Expression alg; private final Expression input; - @FunctionInfo( - returnType = "keyword", - description = "Computes the hash of the input keyword." - ) + @FunctionInfo(returnType = "keyword", description = "Computes the hash of the input keyword.") public Hash( Source source, @Param(name = "alg", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression alg, @@ -51,11 +48,7 @@ public Hash( } private Hash(StreamInput in) throws IOException { - this( - Source.readFrom((PlanStreamInput) in), - in.readNamedWriteable(Expression.class), - in.readNamedWriteable(Expression.class) - ); + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); } @Override @@ -75,6 +68,11 @@ public DataType dataType() { return DataType.KEYWORD; } + @Override + public boolean foldable() { + return alg.foldable() && input.foldable(); + } + @Evaluator(warnExceptions = NoSuchAlgorithmException.class) static BytesRef process(BytesRef alg, BytesRef input) throws NoSuchAlgorithmException { return hash(MessageDigest.getInstance(alg.utf8ToString()), input); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java new file mode 100644 index 000000000000..86b4df5b0d04 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.hash; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.HexFormat; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +public class HashTests extends AbstractScalarFunctionTestCase { + + public HashTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List cases = new ArrayList<>(); + for (String alg : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) { + cases.addAll(createTestCases(alg)); + } + return parameterSuppliersFromTypedData(cases); + } + + private static List createTestCases(String alg) { + return List.of( + createTestCase(alg, DataType.KEYWORD, DataType.KEYWORD), + createTestCase(alg, DataType.KEYWORD, DataType.TEXT), + createTestCase(alg, DataType.TEXT, DataType.KEYWORD), + createTestCase(alg, DataType.TEXT, DataType.TEXT) + ); + } + + private static TestCaseSupplier createTestCase(String alg, DataType algType, DataType inputType) { + return new TestCaseSupplier(alg, List.of(algType, inputType), () -> { + var input = randomAlphaOfLength(10); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(new BytesRef(alg), algType, "alg"), + new TestCaseSupplier.TypedData(input, inputType, "input") + ), + "HashEvaluator[alg=Attribute[channel=0], input=Attribute[channel=1]]", + DataType.KEYWORD, + equalTo(new BytesRef(hash(alg, input))) + ); + }); + } + + private static String hash(String alg, String input) { + try { + return HexFormat.of().formatHex(MessageDigest.getInstance(alg).digest(input.getBytes())); + } catch (NoSuchAlgorithmException e) { + throw new IllegalArgumentException("Unknown algorithm: " + alg); + } + } + + @Override + protected Expression build(Source source, List args) { + return new Hash(source, args.get(0), args.get(1)); + } +} From 06a322a54cb13486cd0275bf6b5a22cdb7cf16d9 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 13:53:38 +0100 Subject: [PATCH 06/33] update description --- .../esql/functions/description/hash.asciidoc | 5 ++ .../functions/kibana/definition/hash.json | 82 +++++++++++++++++++ .../esql/functions/kibana/docs/hash.md | 7 ++ .../esql/functions/layout/hash.asciidoc | 14 ++++ .../esql/functions/parameters/hash.asciidoc | 9 ++ .../esql/functions/signature/hash.svg | 1 + .../esql/functions/types/hash.asciidoc | 12 +++ .../expression/function/scalar/hash/Hash.java | 2 +- 8 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 docs/reference/esql/functions/description/hash.asciidoc create mode 100644 docs/reference/esql/functions/kibana/definition/hash.json create mode 100644 docs/reference/esql/functions/kibana/docs/hash.md create mode 100644 docs/reference/esql/functions/layout/hash.asciidoc create mode 100644 docs/reference/esql/functions/parameters/hash.asciidoc create mode 100644 docs/reference/esql/functions/signature/hash.svg create mode 100644 docs/reference/esql/functions/types/hash.asciidoc diff --git a/docs/reference/esql/functions/description/hash.asciidoc b/docs/reference/esql/functions/description/hash.asciidoc new file mode 100644 index 000000000000..b9a2b14f5ea5 --- /dev/null +++ b/docs/reference/esql/functions/description/hash.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Computes the hash of the input using the supplied algorithm. diff --git a/docs/reference/esql/functions/kibana/definition/hash.json b/docs/reference/esql/functions/kibana/definition/hash.json new file mode 100644 index 000000000000..f10f901a0d75 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/hash.json @@ -0,0 +1,82 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", + "name" : "hash", + "description" : "Computes the hash of the input using the supplied algorithm.", + "signatures" : [ + { + "params" : [ + { + "name" : "alg", + "type" : "keyword", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "keyword", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "alg", + "type" : "keyword", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "text", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "alg", + "type" : "text", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "keyword", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "alg", + "type" : "text", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "text", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + } + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/esql/functions/kibana/docs/hash.md b/docs/reference/esql/functions/kibana/docs/hash.md new file mode 100644 index 000000000000..23cd3f31b0a7 --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/hash.md @@ -0,0 +1,7 @@ + + +### HASH +Computes the hash of the input using the supplied algorithm. + diff --git a/docs/reference/esql/functions/layout/hash.asciidoc b/docs/reference/esql/functions/layout/hash.asciidoc new file mode 100644 index 000000000000..27c55ada6319 --- /dev/null +++ b/docs/reference/esql/functions/layout/hash.asciidoc @@ -0,0 +1,14 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-hash]] +=== `HASH` + +*Syntax* + +[.text-center] +image::esql/functions/signature/hash.svg[Embedded,opts=inline] + +include::../parameters/hash.asciidoc[] +include::../description/hash.asciidoc[] +include::../types/hash.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/hash.asciidoc b/docs/reference/esql/functions/parameters/hash.asciidoc new file mode 100644 index 000000000000..cabffe51f7fe --- /dev/null +++ b/docs/reference/esql/functions/parameters/hash.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`alg`:: +Hash algorithm to use. + +`input`:: +Input to hash. diff --git a/docs/reference/esql/functions/signature/hash.svg b/docs/reference/esql/functions/signature/hash.svg new file mode 100644 index 000000000000..2b6b1088ed56 --- /dev/null +++ b/docs/reference/esql/functions/signature/hash.svg @@ -0,0 +1 @@ +HASH(alg,input) \ No newline at end of file diff --git a/docs/reference/esql/functions/types/hash.asciidoc b/docs/reference/esql/functions/types/hash.asciidoc new file mode 100644 index 000000000000..9d8a18108467 --- /dev/null +++ b/docs/reference/esql/functions/types/hash.asciidoc @@ -0,0 +1,12 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +alg | input | result +keyword | keyword | keyword +keyword | text | keyword +text | keyword | keyword +text | text | keyword +|=== diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index af763e724ec7..2311af2cf7cd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -36,7 +36,7 @@ public class Hash extends EsqlScalarFunction { private final Expression alg; private final Expression input; - @FunctionInfo(returnType = "keyword", description = "Computes the hash of the input keyword.") + @FunctionInfo(returnType = "keyword", description = "Computes the hash of the input using the supplied algorithm.") public Hash( Source source, @Param(name = "alg", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression alg, From 2c839256339bb1fd7c510f26ba379f2ba3406986 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 14:11:22 +0100 Subject: [PATCH 07/33] replace getBytes usage --- .../xpack/esql/expression/function/scalar/hash/Hash.java | 4 +++- .../esql/expression/function/scalar/hash/HashTests.java | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 2311af2cf7cd..78575503f9ec 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -84,7 +84,9 @@ static BytesRef processConstant(@Fixed(build = true) MessageDigest alg, BytesRef } private static BytesRef hash(MessageDigest alg, BytesRef input) { - return new BytesRef(HexFormat.of().formatHex(alg.digest(input.utf8ToString().getBytes()))); + alg.update(input.bytes, input.offset, input.length); + var result = alg.digest(); + return new BytesRef(HexFormat.of().formatHex(result)); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java index 86b4df5b0d04..9474013d698f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -56,7 +57,7 @@ private static TestCaseSupplier createTestCase(String alg, DataType algType, Dat return new TestCaseSupplier.TestCase( List.of( new TestCaseSupplier.TypedData(new BytesRef(alg), algType, "alg"), - new TestCaseSupplier.TypedData(input, inputType, "input") + new TestCaseSupplier.TypedData(new BytesRef(input), inputType, "input") ), "HashEvaluator[alg=Attribute[channel=0], input=Attribute[channel=1]]", DataType.KEYWORD, @@ -67,7 +68,7 @@ private static TestCaseSupplier createTestCase(String alg, DataType algType, Dat private static String hash(String alg, String input) { try { - return HexFormat.of().formatHex(MessageDigest.getInstance(alg).digest(input.getBytes())); + return HexFormat.of().formatHex(MessageDigest.getInstance(alg).digest(input.getBytes(StandardCharsets.UTF_8))); } catch (NoSuchAlgorithmException e) { throw new IllegalArgumentException("Unknown algorithm: " + alg); } From 644874f41e466eacd208e878c4c21b94db986798 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 14:34:08 +0100 Subject: [PATCH 08/33] introduce capability --- .../esql/qa/testFixtures/src/main/resources/hash.csv-spec | 4 ++++ .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index c66e6dddf7b5..3f4a96133533 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -1,4 +1,6 @@ md5 +required_capability: kql_function + ROW value = "test" | EVAL hash = hash("md5", value); value:keyword | hash:keyword @@ -6,6 +8,8 @@ test | 098f6bcd4621d373cade4e832627b4f6 ; sha256 +required_capability: kql_function + ROW value = "test" | EVAL hash = hash("sha256", value); value:keyword | hash:keyword diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 4845c7061949..1fa4afb684ca 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -424,6 +424,11 @@ public enum Cap { */ KQL_FUNCTION(Build.current().isSnapshot()), + /** + * Hash function + */ + HASH_FUNCTION(Build.current().isSnapshot()), + /** * Don't optimize CASE IS NOT NULL function by not requiring the fields to be not null as well. * https://github.com/elastic/elasticsearch/issues/112704 From b324f39b429a3d639f7a02f8a09155a624e80f85 Mon Sep 17 00:00:00 2001 From: Ievgen Degtiarenko Date: Wed, 4 Dec 2024 14:41:18 +0100 Subject: [PATCH 09/33] Update docs/changelog/117989.yaml --- docs/changelog/117989.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/117989.yaml diff --git a/docs/changelog/117989.yaml b/docs/changelog/117989.yaml new file mode 100644 index 000000000000..e4967141b3eb --- /dev/null +++ b/docs/changelog/117989.yaml @@ -0,0 +1,5 @@ +pr: 117989 +summary: ESQL Add esql hash function +area: ES|QL +type: enhancement +issues: [] From 514e1dbbd149c168d544d2f12caf06889fc91531 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 15:38:51 +0100 Subject: [PATCH 10/33] update functions counter --- .../yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index c23b44c00bd1..932241da2c53 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -92,7 +92,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} # Testing for the entire function set isn't feasbile, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 122} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 123} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": From 052c247fe93219101245201e0194139a235cac47 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 4 Dec 2024 16:37:27 +0100 Subject: [PATCH 11/33] fix required capability --- .../esql/qa/testFixtures/src/main/resources/hash.csv-spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index 3f4a96133533..556468ad35dd 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -1,5 +1,5 @@ md5 -required_capability: kql_function +required_capability: hash_function ROW value = "test" | EVAL hash = hash("md5", value); @@ -8,7 +8,7 @@ test | 098f6bcd4621d373cade4e832627b4f6 ; sha256 -required_capability: kql_function +required_capability: hash_function ROW value = "test" | EVAL hash = hash("sha256", value); From 1adfc890dad43cec21c1dc85cccb5f22658383fa Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 5 Dec 2024 10:08:45 +0100 Subject: [PATCH 12/33] additional test cases --- .../expression/function/scalar/hash/Hash.java | 18 ++++++++++++++++++ .../function/scalar/hash/HashTests.java | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 78575503f9ec..7826fb6479c1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -29,6 +29,10 @@ import java.util.HexFormat; import java.util.List; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + public class Hash extends EsqlScalarFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hash", Hash::new); @@ -68,6 +72,20 @@ public DataType dataType() { return DataType.KEYWORD; } + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution resolution = isString(alg, sourceText(), FIRST); + if (resolution.unresolved()) { + return resolution; + } + + return isString(input, sourceText(), SECOND); + } + @Override public boolean foldable() { return alg.foldable() && input.foldable(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java index 9474013d698f..3185145dc4cc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java @@ -39,7 +39,7 @@ public static Iterable parameters() { for (String alg : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) { cases.addAll(createTestCases(alg)); } - return parameterSuppliersFromTypedData(cases); + return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases, (v, p) -> "string"); } private static List createTestCases(String alg) { From 633a044791c7132d257fa255dd978783709e9a47 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 5 Dec 2024 10:39:48 +0100 Subject: [PATCH 13/33] update spec --- .../yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 50c80e0ff3e6..81f65668722f 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -163,4 +163,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 123} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 124} # check the "sister" test above for a likely update to the same esql.functions length check From f0630556c759f92f6d55a65e869d77e57b29f3f0 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 5 Dec 2024 10:42:28 +0100 Subject: [PATCH 14/33] move function definition --- .../esql/expression/function/EsqlFunctionRegistry.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0536fafa9212..454bec27570b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -340,7 +340,8 @@ private static FunctionDefinition[][] functions() { def(Substring.class, Substring::new, "substring"), def(ToLower.class, ToLower::new, "to_lower"), def(ToUpper.class, ToUpper::new, "to_upper"), - def(Trim.class, Trim::new, "trim") }, + def(Trim.class, Trim::new, "trim"), + def(Hash.class, Hash::new, "hash") }, // date new FunctionDefinition[] { def(DateDiff.class, DateDiff::new, "date_diff"), @@ -414,9 +415,7 @@ private static FunctionDefinition[][] functions() { def(MvSum.class, MvSum::new, "mv_sum"), def(Split.class, Split::new, "split") }, // fulltext functions - new FunctionDefinition[] { def(Match.class, Match::new, "match"), def(QueryString.class, QueryString::new, "qstr") }, - // hash - new FunctionDefinition[] { def(Hash.class, Hash::new, "hash") } }; + new FunctionDefinition[] { def(Match.class, Match::new, "match"), def(QueryString.class, QueryString::new, "qstr") } }; } private static FunctionDefinition[][] snapshotFunctions() { From 062967f7e402785eebbf62602539f7d79ecd8f90 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 5 Dec 2024 11:28:06 +0100 Subject: [PATCH 15/33] update --- .../src/main/resources/hash.csv-spec | 22 +++++------ .../scalar/hash/HashConstantEvaluator.java | 20 +++++++--- .../function/scalar/hash/HashEvaluator.java | 24 ++++++++---- .../expression/function/scalar/hash/Hash.java | 38 ++++++++++++++----- 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index 556468ad35dd..50a4309dae8a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -1,17 +1,13 @@ -md5 +hash required_capability: hash_function -ROW value = "test" | EVAL hash = hash("md5", value); +FROM languages +| EVAL md5 = hash("md5", language_name), sha256 = hash("sha256", language_name) +| KEEP language_name, md5, sha256; -value:keyword | hash:keyword -test | 098f6bcd4621d373cade4e832627b4f6 -; - -sha256 -required_capability: hash_function - -ROW value = "test" | EVAL hash = hash("sha256", value); - -value:keyword | hash:keyword -test | 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 +language_name:keyword | md5:keyword | sha256:keyword +English | 78463a384a5aa4fad5fa73e2f506ecfc | ba118bf7fc9c1aedc1edb28a0aa86e0b43b681f222af6616e13c43be87815b06 +French | ad225f707802ba118c22987186dd38e8 | 7458199fe97a184002bfd3c42bae81371f0ae2229b5c0a257a9bae77e4f4eda8 +Spanish | cb5480c32e71778852b08ae1e8712775 | 3411059cb8e0660e29dd7a3737e65a28b08eb01524a8ebc3d4168932649f23e6 +German | 86bc3115eb4e9873ac96904a4a68e19e | a659b60d246dce8d4865d45673963de83d0e49a57e8559da98581c2bc5d7d97f ; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java index f175afbe5e6e..512b5d93e228 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java @@ -14,6 +14,7 @@ import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.Warnings; @@ -27,6 +28,8 @@ public final class HashConstantEvaluator implements EvalOperator.ExpressionEvaluator { private final Source source; + private final BreakingBytesRefBuilder scratch; + private final MessageDigest alg; private final EvalOperator.ExpressionEvaluator input; @@ -35,9 +38,10 @@ public final class HashConstantEvaluator implements EvalOperator.ExpressionEvalu private Warnings warnings; - public HashConstantEvaluator(Source source, MessageDigest alg, + public HashConstantEvaluator(Source source, BreakingBytesRefBuilder scratch, MessageDigest alg, EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { this.source = source; + this.scratch = scratch; this.alg = alg; this.input = input; this.driverContext = driverContext; @@ -69,7 +73,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock inputBlock) { result.appendNull(); continue position; } - result.appendBytesRef(Hash.processConstant(this.alg, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + result.appendBytesRef(Hash.processConstant(this.scratch, this.alg, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); } return result.build(); } @@ -79,7 +83,7 @@ public BytesRefVector eval(int positionCount, BytesRefVector inputVector) { try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { BytesRef inputScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { - result.appendBytesRef(Hash.processConstant(this.alg, inputVector.getBytesRef(p, inputScratch))); + result.appendBytesRef(Hash.processConstant(this.scratch, this.alg, inputVector.getBytesRef(p, inputScratch))); } return result.build(); } @@ -92,7 +96,7 @@ public String toString() { @Override public void close() { - Releasables.closeExpectNoException(input); + Releasables.closeExpectNoException(scratch, input); } private Warnings warnings() { @@ -110,20 +114,24 @@ private Warnings warnings() { static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Source source; + private final Function scratch; + private final Function alg; private final EvalOperator.ExpressionEvaluator.Factory input; - public Factory(Source source, Function alg, + public Factory(Source source, Function scratch, + Function alg, EvalOperator.ExpressionEvaluator.Factory input) { this.source = source; + this.scratch = scratch; this.alg = alg; this.input = input; } @Override public HashConstantEvaluator get(DriverContext context) { - return new HashConstantEvaluator(source, alg.apply(context), input.get(context), context); + return new HashConstantEvaluator(source, scratch.apply(context), alg.apply(context), input.get(context), context); } @Override diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java index a814cff1f81d..e283b445f39f 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java @@ -8,11 +8,13 @@ import java.lang.Override; import java.lang.String; import java.security.NoSuchAlgorithmException; +import java.util.function.Function; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.Warnings; @@ -26,6 +28,8 @@ public final class HashEvaluator implements EvalOperator.ExpressionEvaluator { private final Source source; + private final BreakingBytesRefBuilder scratch; + private final EvalOperator.ExpressionEvaluator alg; private final EvalOperator.ExpressionEvaluator input; @@ -34,9 +38,11 @@ public final class HashEvaluator implements EvalOperator.ExpressionEvaluator { private Warnings warnings; - public HashEvaluator(Source source, EvalOperator.ExpressionEvaluator alg, - EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { + public HashEvaluator(Source source, BreakingBytesRefBuilder scratch, + EvalOperator.ExpressionEvaluator alg, EvalOperator.ExpressionEvaluator input, + DriverContext driverContext) { this.source = source; + this.scratch = scratch; this.alg = alg; this.input = input; this.driverContext = driverContext; @@ -87,7 +93,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock algBlock, BytesRefBlo continue position; } try { - result.appendBytesRef(Hash.process(algBlock.getBytesRef(algBlock.getFirstValueIndex(p), algScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + result.appendBytesRef(Hash.process(this.scratch, algBlock.getBytesRef(algBlock.getFirstValueIndex(p), algScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); } catch (NoSuchAlgorithmException e) { warnings().registerException(e); result.appendNull(); @@ -104,7 +110,7 @@ public BytesRefBlock eval(int positionCount, BytesRefVector algVector, BytesRef inputScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { try { - result.appendBytesRef(Hash.process(algVector.getBytesRef(p, algScratch), inputVector.getBytesRef(p, inputScratch))); + result.appendBytesRef(Hash.process(this.scratch, algVector.getBytesRef(p, algScratch), inputVector.getBytesRef(p, inputScratch))); } catch (NoSuchAlgorithmException e) { warnings().registerException(e); result.appendNull(); @@ -121,7 +127,7 @@ public String toString() { @Override public void close() { - Releasables.closeExpectNoException(alg, input); + Releasables.closeExpectNoException(scratch, alg, input); } private Warnings warnings() { @@ -139,20 +145,24 @@ private Warnings warnings() { static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Source source; + private final Function scratch; + private final EvalOperator.ExpressionEvaluator.Factory alg; private final EvalOperator.ExpressionEvaluator.Factory input; - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory alg, + public Factory(Source source, Function scratch, + EvalOperator.ExpressionEvaluator.Factory alg, EvalOperator.ExpressionEvaluator.Factory input) { this.source = source; + this.scratch = scratch; this.alg = alg; this.input = input; } @Override public HashEvaluator get(DriverContext context) { - return new HashEvaluator(source, alg.get(context), input.get(context), context); + return new HashEvaluator(source, scratch.apply(context), alg.get(context), input.get(context), context); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 7826fb6479c1..29e267f91ccf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -92,19 +93,28 @@ public boolean foldable() { } @Evaluator(warnExceptions = NoSuchAlgorithmException.class) - static BytesRef process(BytesRef alg, BytesRef input) throws NoSuchAlgorithmException { - return hash(MessageDigest.getInstance(alg.utf8ToString()), input); + static BytesRef process(@Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, BytesRef alg, BytesRef input) + throws NoSuchAlgorithmException { + return hash(scratch, MessageDigest.getInstance(alg.utf8ToString()), input); } @Evaluator(extraName = "Constant") - static BytesRef processConstant(@Fixed(build = true) MessageDigest alg, BytesRef input) { - return hash(alg, input); + static BytesRef processConstant( + @Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, + @Fixed(build = true) MessageDigest alg, + BytesRef input + ) { + return hash(scratch, alg, input); } - private static BytesRef hash(MessageDigest alg, BytesRef input) { + private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest alg, BytesRef input) { + alg.reset(); alg.update(input.bytes, input.offset, input.length); - var result = alg.digest(); - return new BytesRef(HexFormat.of().formatHex(result)); + var digest = alg.digest(); + scratch.clear(); + scratch.grow(digest.length * 2); + scratch.append(new BytesRef(HexFormat.of().formatHex(digest))); + return scratch.bytesRefView(); } @Override @@ -112,12 +122,22 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvalua if (alg.foldable() && alg.dataType() == DataType.KEYWORD) { try { var md = MessageDigest.getInstance(((BytesRef) alg.fold()).utf8ToString()); - return new HashConstantEvaluator.Factory(source(), context -> md, toEvaluator.apply(input)); + return new HashConstantEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), + context -> md, + toEvaluator.apply(input) + ); } catch (NoSuchAlgorithmException e) { throw new IllegalArgumentException(e); } } else { - return new HashEvaluator.Factory(source(), toEvaluator.apply(alg), toEvaluator.apply(input)); + return new HashEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), + toEvaluator.apply(alg), + toEvaluator.apply(input) + ); } } From 62f97c3d530e81d39b0dc5145bb6db48bbc94f22 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 08:52:48 +0100 Subject: [PATCH 16/33] enable by default --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 7d1bd3da942c..e05de9424946 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -432,7 +432,7 @@ public enum Cap { /** * Hash function */ - HASH_FUNCTION(Build.current().isSnapshot()), + HASH_FUNCTION, /** * Don't optimize CASE IS NOT NULL function by not requiring the fields to be not null as well. From 923f45329bd4f60c7f8b02d61c0b7bb86c52da8c Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 10:35:25 +0100 Subject: [PATCH 17/33] handle error cases --- .../esql/functions/description/hash.asciidoc | 2 +- .../functions/kibana/definition/hash.json | 2 +- .../esql/functions/kibana/docs/hash.md | 2 +- .../src/main/resources/hash.csv-spec | 1 + .../expression/function/scalar/hash/Hash.java | 7 ++-- .../function/scalar/hash/HashTests.java | 32 +++++++++++++++++++ 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/docs/reference/esql/functions/description/hash.asciidoc b/docs/reference/esql/functions/description/hash.asciidoc index b9a2b14f5ea5..8521baedc0b3 100644 --- a/docs/reference/esql/functions/description/hash.asciidoc +++ b/docs/reference/esql/functions/description/hash.asciidoc @@ -2,4 +2,4 @@ *Description* -Computes the hash of the input using the supplied algorithm. +Computes the hash of the input using java.security.MessageDigest. diff --git a/docs/reference/esql/functions/kibana/definition/hash.json b/docs/reference/esql/functions/kibana/definition/hash.json index f10f901a0d75..fd27124cf08e 100644 --- a/docs/reference/esql/functions/kibana/definition/hash.json +++ b/docs/reference/esql/functions/kibana/definition/hash.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "hash", - "description" : "Computes the hash of the input using the supplied algorithm.", + "description" : "Computes the hash of the input using java.security.MessageDigest.", "signatures" : [ { "params" : [ diff --git a/docs/reference/esql/functions/kibana/docs/hash.md b/docs/reference/esql/functions/kibana/docs/hash.md index 23cd3f31b0a7..8086299d5e0a 100644 --- a/docs/reference/esql/functions/kibana/docs/hash.md +++ b/docs/reference/esql/functions/kibana/docs/hash.md @@ -3,5 +3,5 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### HASH -Computes the hash of the input using the supplied algorithm. +Computes the hash of the input using java.security.MessageDigest. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index 50a4309dae8a..e911b34eb176 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -4,6 +4,7 @@ required_capability: hash_function FROM languages | EVAL md5 = hash("md5", language_name), sha256 = hash("sha256", language_name) | KEEP language_name, md5, sha256; +ignoreOrder:true language_name:keyword | md5:keyword | sha256:keyword English | 78463a384a5aa4fad5fa73e2f506ecfc | ba118bf7fc9c1aedc1edb28a0aa86e0b43b681f222af6616e13c43be87815b06 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index 29e267f91ccf..bb19b498585c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -15,6 +15,7 @@ import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -41,7 +42,7 @@ public class Hash extends EsqlScalarFunction { private final Expression alg; private final Expression input; - @FunctionInfo(returnType = "keyword", description = "Computes the hash of the input using the supplied algorithm.") + @FunctionInfo(returnType = "keyword", description = "Computes the hash of the input using java.security.MessageDigest.") public Hash( Source source, @Param(name = "alg", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression alg, @@ -119,7 +120,7 @@ private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest alg, @Override public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { - if (alg.foldable() && alg.dataType() == DataType.KEYWORD) { + if (alg.foldable()) { try { var md = MessageDigest.getInstance(((BytesRef) alg.fold()).utf8ToString()); return new HashConstantEvaluator.Factory( @@ -129,7 +130,7 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvalua toEvaluator.apply(input) ); } catch (NoSuchAlgorithmException e) { - throw new IllegalArgumentException(e); + throw new InvalidArgumentException(e, "invalid alg for [{}]: {}", sourceText(), e.getMessage()); } } else { return new HashEvaluator.Factory( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java index 3185145dc4cc..59fc12514332 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java @@ -11,7 +11,10 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; @@ -26,6 +29,9 @@ import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.startsWith; public class HashTests extends AbstractScalarFunctionTestCase { @@ -39,6 +45,20 @@ public static Iterable parameters() { for (String alg : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) { cases.addAll(createTestCases(alg)); } + cases.add(new TestCaseSupplier("Invalid alg", List.of(DataType.KEYWORD, DataType.KEYWORD), () -> { + var input = randomAlphaOfLength(10); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(new BytesRef("invalid"), DataType.KEYWORD, "alg"), + new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "input") + ), + "HashEvaluator[alg=Attribute[channel=0], input=Attribute[channel=1]]", + DataType.KEYWORD, + is(nullValue()) + ).withWarning("Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.") + .withWarning("Line -1:-1: java.security.NoSuchAlgorithmException: invalid MessageDigest not available") + .withFoldingException(InvalidArgumentException.class, "invalid alg for []: invalid MessageDigest not available"); + })); return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases, (v, p) -> "string"); } @@ -78,4 +98,16 @@ private static String hash(String alg, String input) { protected Expression build(Source source, List args) { return new Hash(source, args.get(0), args.get(1)); } + + public void testInvalidAlgLiteral() { + Source source = new Source(0, 0, "hast(\"invalid\", input)"); + DriverContext driverContext = driverContext(); + InvalidArgumentException e = expectThrows( + InvalidArgumentException.class, + () -> evaluator( + new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("str", DataType.KEYWORD)) + ).get(driverContext) + ); + assertThat(e.getMessage(), startsWith("invalid alg for [hast(\"invalid\", input)]: invalid MessageDigest not available")); + } } From 9863fcc9877e339f522fad3b3f43fc353410665d Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 12:19:01 +0100 Subject: [PATCH 18/33] make formatting a bit cheaper --- .../expression/function/scalar/hash/Hash.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java index bb19b498585c..cde1e18457fc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java @@ -28,7 +28,6 @@ import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.HexFormat; import java.util.List; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; @@ -114,10 +113,23 @@ private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest alg, var digest = alg.digest(); scratch.clear(); scratch.grow(digest.length * 2); - scratch.append(new BytesRef(HexFormat.of().formatHex(digest))); + appendUtf8HexDigest(scratch, digest); return scratch.bytesRefView(); } + private static final byte[] ASCII_HEX_BYTES = new byte[] { 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 97, 98, 99, 100, 101, 102 }; + + /** + * This function allows to append hex bytes dirrectly to the {@link BreakingBytesRefBuilder} + * bypassing unnecessary array allocations and byte array copying. + */ + private static void appendUtf8HexDigest(BreakingBytesRefBuilder scratch, byte[] bytes) { + for (byte b : bytes) { + scratch.append(ASCII_HEX_BYTES[b >> 4 & 0xf]); + scratch.append(ASCII_HEX_BYTES[b & 0xf]); + } + } + @Override public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { if (alg.foldable()) { From a22e2ad15c964f94bcf0d92c0cede418fa2684dd Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 13:24:22 +0100 Subject: [PATCH 19/33] fix yaml test --- .../yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 81f65668722f..04eaec5154bf 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -92,7 +92,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} # Testing for the entire function set isn't feasbile, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 128} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 129} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": From bb8f73bf5858b84e2a629a11febcea773289d027 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 13:25:13 +0100 Subject: [PATCH 20/33] update corresponding test --- .../yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 04eaec5154bf..2a4cde9a680e 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -163,4 +163,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 124} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 125} # check the "sister" test above for a likely update to the same esql.functions length check From 2506a55bdbc61f9bec4473feb2e141b094ecea53 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 16:09:56 +0100 Subject: [PATCH 21/33] reorder --- .../scalar/{hash => string}/HashConstantEvaluator.java | 2 +- .../function/scalar/{hash => string}/HashEvaluator.java | 2 +- .../esql/expression/function/EsqlFunctionRegistry.java | 6 +++--- .../expression/function/scalar/ScalarFunctionWritables.java | 4 ++-- .../expression/function/scalar/{hash => string}/Hash.java | 2 +- .../function/scalar/{hash => string}/HashTests.java | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) rename x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/{hash => string}/HashConstantEvaluator.java (99%) rename x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/{hash => string}/HashEvaluator.java (99%) rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/{hash => string}/Hash.java (99%) rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/{hash => string}/HashTests.java (99%) diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java similarity index 99% rename from x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java rename to x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java index 512b5d93e228..f82501da7c97 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashConstantEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License // 2.0; you may not use this file except in compliance with the Elastic License // 2.0. -package org.elasticsearch.xpack.esql.expression.function.scalar.hash; +package org.elasticsearch.xpack.esql.expression.function.scalar.string; import java.lang.IllegalArgumentException; import java.lang.Override; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java similarity index 99% rename from x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java rename to x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java index e283b445f39f..3c782dafafe0 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java @@ -2,7 +2,7 @@ // or more contributor license agreements. Licensed under the Elastic License // 2.0; you may not use this file except in compliance with the Elastic License // 2.0. -package org.elasticsearch.xpack.esql.expression.function.scalar.hash; +package org.elasticsearch.xpack.esql.expression.function.scalar.string; import java.lang.IllegalArgumentException; import java.lang.Override; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index f31ab8ab884e..997bb22889f5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -68,7 +68,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateParse; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc; import org.elasticsearch.xpack.esql.expression.function.scalar.date.Now; -import org.elasticsearch.xpack.esql.expression.function.scalar.hash.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.IpPrefix; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Abs; @@ -130,6 +129,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length; @@ -327,6 +327,7 @@ private static FunctionDefinition[][] functions() { def(ByteLength.class, ByteLength::new, "byte_length"), def(Concat.class, Concat::new, "concat"), def(EndsWith.class, EndsWith::new, "ends_with"), + def(Hash.class, Hash::new, "hash"), def(LTrim.class, LTrim::new, "ltrim"), def(Left.class, Left::new, "left"), def(Length.class, Length::new, "length"), @@ -341,8 +342,7 @@ private static FunctionDefinition[][] functions() { def(Substring.class, Substring::new, "substring"), def(ToLower.class, ToLower::new, "to_lower"), def(ToUpper.class, ToUpper::new, "to_upper"), - def(Trim.class, Trim::new, "trim"), - def(Hash.class, Hash::new, "hash") }, + def(Trim.class, Trim::new, "trim") }, // date new FunctionDefinition[] { def(DateDiff.class, DateDiff::new, "date_diff"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 40c29c7ea388..820af4e7f3d0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -20,7 +20,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateParse; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc; import org.elasticsearch.xpack.esql.expression.function.scalar.date.Now; -import org.elasticsearch.xpack.esql.expression.function.scalar.hash.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.IpPrefix; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Atan2; @@ -35,6 +34,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Repeat; @@ -64,6 +64,7 @@ public static List getNamedWriteables() { entries.add(Concat.ENTRY); entries.add(E.ENTRY); entries.add(EndsWith.ENTRY); + entries.add(Hash.ENTRY); entries.add(Greatest.ENTRY); entries.add(Hypot.ENTRY); entries.add(In.ENTRY); @@ -93,7 +94,6 @@ public static List getNamedWriteables() { entries.add(Tau.ENTRY); entries.add(ToLower.ENTRY); entries.add(ToUpper.ENTRY); - entries.add(Hash.ENTRY); entries.addAll(GroupingWritables.getNamedWriteables()); return entries; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java similarity index 99% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java index cde1e18457fc..4d22fcf96407 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.expression.function.scalar.hash; +package org.elasticsearch.xpack.esql.expression.function.scalar.string; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java similarity index 99% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index 59fc12514332..887cec50ed6b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/hash/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.expression.function.scalar.hash; +package org.elasticsearch.xpack.esql.expression.function.scalar.string; import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; From dce7cd0f7b22f2248fea726866239b379cd602ba Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 6 Dec 2024 16:18:36 +0100 Subject: [PATCH 22/33] update docs --- docs/reference/esql/functions/description/hash.asciidoc | 2 +- docs/reference/esql/functions/kibana/definition/hash.json | 2 +- docs/reference/esql/functions/kibana/docs/hash.md | 2 +- docs/reference/esql/functions/string-functions.asciidoc | 2 ++ .../xpack/esql/expression/function/scalar/string/Hash.java | 5 ++++- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/reference/esql/functions/description/hash.asciidoc b/docs/reference/esql/functions/description/hash.asciidoc index 8521baedc0b3..e074915c5132 100644 --- a/docs/reference/esql/functions/description/hash.asciidoc +++ b/docs/reference/esql/functions/description/hash.asciidoc @@ -2,4 +2,4 @@ *Description* -Computes the hash of the input using java.security.MessageDigest. +Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512. diff --git a/docs/reference/esql/functions/kibana/definition/hash.json b/docs/reference/esql/functions/kibana/definition/hash.json index fd27124cf08e..21f56bad80df 100644 --- a/docs/reference/esql/functions/kibana/definition/hash.json +++ b/docs/reference/esql/functions/kibana/definition/hash.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "hash", - "description" : "Computes the hash of the input using java.security.MessageDigest.", + "description" : "Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512.", "signatures" : [ { "params" : [ diff --git a/docs/reference/esql/functions/kibana/docs/hash.md b/docs/reference/esql/functions/kibana/docs/hash.md index 8086299d5e0a..9826e80ec5be 100644 --- a/docs/reference/esql/functions/kibana/docs/hash.md +++ b/docs/reference/esql/functions/kibana/docs/hash.md @@ -3,5 +3,5 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### HASH -Computes the hash of the input using java.security.MessageDigest. +Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512. diff --git a/docs/reference/esql/functions/string-functions.asciidoc b/docs/reference/esql/functions/string-functions.asciidoc index ce9636f5c5a3..da9580a55151 100644 --- a/docs/reference/esql/functions/string-functions.asciidoc +++ b/docs/reference/esql/functions/string-functions.asciidoc @@ -13,6 +13,7 @@ * <> * <> * <> +* <> * <> * <> * <> @@ -37,6 +38,7 @@ include::layout/byte_length.asciidoc[] include::layout/concat.asciidoc[] include::layout/ends_with.asciidoc[] include::layout/from_base64.asciidoc[] +include::layout/hash.asciidoc[] include::layout/left.asciidoc[] include::layout/length.asciidoc[] include::layout/locate.asciidoc[] diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java index 4d22fcf96407..38d085e93d5a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -41,7 +41,10 @@ public class Hash extends EsqlScalarFunction { private final Expression alg; private final Expression input; - @FunctionInfo(returnType = "keyword", description = "Computes the hash of the input using java.security.MessageDigest.") + @FunctionInfo( + returnType = "keyword", + description = "Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512." + ) public Hash( Source source, @Param(name = "alg", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression alg, From ed4f01f386384d2143e446bcd7154e70c305ddf9 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 11 Dec 2024 15:28:42 +0100 Subject: [PATCH 23/33] use non lookup index in test --- .../src/main/resources/hash.csv-spec | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index e911b34eb176..0a3ae1936e7e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -1,14 +1,15 @@ hash required_capability: hash_function -FROM languages -| EVAL md5 = hash("md5", language_name), sha256 = hash("sha256", language_name) -| KEEP language_name, md5, sha256; +FROM sample_data +| WHERE message != "Connection error" +| EVAL md5 = hash("md5", message), sha256 = hash("sha256", message) +| KEEP message, md5, sha256; ignoreOrder:true -language_name:keyword | md5:keyword | sha256:keyword -English | 78463a384a5aa4fad5fa73e2f506ecfc | ba118bf7fc9c1aedc1edb28a0aa86e0b43b681f222af6616e13c43be87815b06 -French | ad225f707802ba118c22987186dd38e8 | 7458199fe97a184002bfd3c42bae81371f0ae2229b5c0a257a9bae77e4f4eda8 -Spanish | cb5480c32e71778852b08ae1e8712775 | 3411059cb8e0660e29dd7a3737e65a28b08eb01524a8ebc3d4168932649f23e6 -German | 86bc3115eb4e9873ac96904a4a68e19e | a659b60d246dce8d4865d45673963de83d0e49a57e8559da98581c2bc5d7d97f +message:keyword | md5:keyword | sha256:keyword +Connected to 10.1.0.1 | abd7d1ce2bb636842a29246b3512dcae | 6d8372129ad78770f7185554dd39864749a62690216460752d6c075fa38ad85c +Connected to 10.1.0.2 | 8f8f1cb60832d153f5b9ec6dc828b93f | b0db24720f15857091b3c99f4c4833586d0ea3229911b8777efb8d917cf27e9a +Connected to 10.1.0.3 | 912b6dc13503165a15de43304bb77c78 | 75b0480188db8acc4d5cc666a51227eb2bc5b989cd8ca912609f33e0846eff57 +Disconnected | ef70e46fd3bbc21e3e1f0b6815e750c0 | 04dfac3671b494ad53fcd152f7a14511bfb35747278aad8ce254a0d6e4ba4718 ; From 99ad3577e40ce8f988562386c20ff10cdd8ed51b Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 12 Dec 2024 09:50:03 +0100 Subject: [PATCH 24/33] rename alg -> algorithm --- .../functions/kibana/definition/hash.json | 8 +-- .../esql/functions/parameters/hash.asciidoc | 2 +- .../esql/functions/signature/hash.svg | 2 +- .../esql/functions/types/hash.asciidoc | 2 +- .../scalar/string/HashConstantEvaluator.java | 25 ++++----- .../function/scalar/string/HashEvaluator.java | 51 ++++++++++--------- .../function/scalar/string/Hash.java | 43 ++++++++-------- .../function/scalar/string/HashTests.java | 34 ++++++------- 8 files changed, 86 insertions(+), 81 deletions(-) diff --git a/docs/reference/esql/functions/kibana/definition/hash.json b/docs/reference/esql/functions/kibana/definition/hash.json index 21f56bad80df..17a60cf45acf 100644 --- a/docs/reference/esql/functions/kibana/definition/hash.json +++ b/docs/reference/esql/functions/kibana/definition/hash.json @@ -7,7 +7,7 @@ { "params" : [ { - "name" : "alg", + "name" : "algorithm", "type" : "keyword", "optional" : false, "description" : "Hash algorithm to use." @@ -25,7 +25,7 @@ { "params" : [ { - "name" : "alg", + "name" : "algorithm", "type" : "keyword", "optional" : false, "description" : "Hash algorithm to use." @@ -43,7 +43,7 @@ { "params" : [ { - "name" : "alg", + "name" : "algorithm", "type" : "text", "optional" : false, "description" : "Hash algorithm to use." @@ -61,7 +61,7 @@ { "params" : [ { - "name" : "alg", + "name" : "algorithm", "type" : "text", "optional" : false, "description" : "Hash algorithm to use." diff --git a/docs/reference/esql/functions/parameters/hash.asciidoc b/docs/reference/esql/functions/parameters/hash.asciidoc index cabffe51f7fe..d47a82d4ab21 100644 --- a/docs/reference/esql/functions/parameters/hash.asciidoc +++ b/docs/reference/esql/functions/parameters/hash.asciidoc @@ -2,7 +2,7 @@ *Parameters* -`alg`:: +`algorithm`:: Hash algorithm to use. `input`:: diff --git a/docs/reference/esql/functions/signature/hash.svg b/docs/reference/esql/functions/signature/hash.svg index 2b6b1088ed56..f819e14c9d1a 100644 --- a/docs/reference/esql/functions/signature/hash.svg +++ b/docs/reference/esql/functions/signature/hash.svg @@ -1 +1 @@ -HASH(alg,input) \ No newline at end of file +HASH(algorithm,input) \ No newline at end of file diff --git a/docs/reference/esql/functions/types/hash.asciidoc b/docs/reference/esql/functions/types/hash.asciidoc index 9d8a18108467..786ba03b2aa6 100644 --- a/docs/reference/esql/functions/types/hash.asciidoc +++ b/docs/reference/esql/functions/types/hash.asciidoc @@ -4,7 +4,7 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== -alg | input | result +algorithm | input | result keyword | keyword | keyword keyword | text | keyword text | keyword | keyword diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java index f82501da7c97..d5bfbfca04d8 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java @@ -30,7 +30,7 @@ public final class HashConstantEvaluator implements EvalOperator.ExpressionEvalu private final BreakingBytesRefBuilder scratch; - private final MessageDigest alg; + private final MessageDigest algorithm; private final EvalOperator.ExpressionEvaluator input; @@ -38,11 +38,12 @@ public final class HashConstantEvaluator implements EvalOperator.ExpressionEvalu private Warnings warnings; - public HashConstantEvaluator(Source source, BreakingBytesRefBuilder scratch, MessageDigest alg, - EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { + public HashConstantEvaluator(Source source, BreakingBytesRefBuilder scratch, + MessageDigest algorithm, EvalOperator.ExpressionEvaluator input, + DriverContext driverContext) { this.source = source; this.scratch = scratch; - this.alg = alg; + this.algorithm = algorithm; this.input = input; this.driverContext = driverContext; } @@ -73,7 +74,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock inputBlock) { result.appendNull(); continue position; } - result.appendBytesRef(Hash.processConstant(this.scratch, this.alg, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + result.appendBytesRef(Hash.processConstant(this.scratch, this.algorithm, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); } return result.build(); } @@ -83,7 +84,7 @@ public BytesRefVector eval(int positionCount, BytesRefVector inputVector) { try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { BytesRef inputScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { - result.appendBytesRef(Hash.processConstant(this.scratch, this.alg, inputVector.getBytesRef(p, inputScratch))); + result.appendBytesRef(Hash.processConstant(this.scratch, this.algorithm, inputVector.getBytesRef(p, inputScratch))); } return result.build(); } @@ -91,7 +92,7 @@ public BytesRefVector eval(int positionCount, BytesRefVector inputVector) { @Override public String toString() { - return "HashConstantEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + return "HashConstantEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; } @Override @@ -116,27 +117,27 @@ static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Function scratch; - private final Function alg; + private final Function algorithm; private final EvalOperator.ExpressionEvaluator.Factory input; public Factory(Source source, Function scratch, - Function alg, + Function algorithm, EvalOperator.ExpressionEvaluator.Factory input) { this.source = source; this.scratch = scratch; - this.alg = alg; + this.algorithm = algorithm; this.input = input; } @Override public HashConstantEvaluator get(DriverContext context) { - return new HashConstantEvaluator(source, scratch.apply(context), alg.apply(context), input.get(context), context); + return new HashConstantEvaluator(source, scratch.apply(context), algorithm.apply(context), input.get(context), context); } @Override public String toString() { - return "HashConstantEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + return "HashConstantEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; } } } diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java index 3c782dafafe0..8b01cc033014 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java @@ -30,7 +30,7 @@ public final class HashEvaluator implements EvalOperator.ExpressionEvaluator { private final BreakingBytesRefBuilder scratch; - private final EvalOperator.ExpressionEvaluator alg; + private final EvalOperator.ExpressionEvaluator algorithm; private final EvalOperator.ExpressionEvaluator input; @@ -39,43 +39,44 @@ public final class HashEvaluator implements EvalOperator.ExpressionEvaluator { private Warnings warnings; public HashEvaluator(Source source, BreakingBytesRefBuilder scratch, - EvalOperator.ExpressionEvaluator alg, EvalOperator.ExpressionEvaluator input, + EvalOperator.ExpressionEvaluator algorithm, EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { this.source = source; this.scratch = scratch; - this.alg = alg; + this.algorithm = algorithm; this.input = input; this.driverContext = driverContext; } @Override public Block eval(Page page) { - try (BytesRefBlock algBlock = (BytesRefBlock) alg.eval(page)) { + try (BytesRefBlock algorithmBlock = (BytesRefBlock) algorithm.eval(page)) { try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) { - BytesRefVector algVector = algBlock.asVector(); - if (algVector == null) { - return eval(page.getPositionCount(), algBlock, inputBlock); + BytesRefVector algorithmVector = algorithmBlock.asVector(); + if (algorithmVector == null) { + return eval(page.getPositionCount(), algorithmBlock, inputBlock); } BytesRefVector inputVector = inputBlock.asVector(); if (inputVector == null) { - return eval(page.getPositionCount(), algBlock, inputBlock); + return eval(page.getPositionCount(), algorithmBlock, inputBlock); } - return eval(page.getPositionCount(), algVector, inputVector); + return eval(page.getPositionCount(), algorithmVector, inputVector); } } } - public BytesRefBlock eval(int positionCount, BytesRefBlock algBlock, BytesRefBlock inputBlock) { + public BytesRefBlock eval(int positionCount, BytesRefBlock algorithmBlock, + BytesRefBlock inputBlock) { try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { - BytesRef algScratch = new BytesRef(); + BytesRef algorithmScratch = new BytesRef(); BytesRef inputScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { - if (algBlock.isNull(p)) { + if (algorithmBlock.isNull(p)) { result.appendNull(); continue position; } - if (algBlock.getValueCount(p) != 1) { - if (algBlock.getValueCount(p) > 1) { + if (algorithmBlock.getValueCount(p) != 1) { + if (algorithmBlock.getValueCount(p) > 1) { warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); } result.appendNull(); @@ -93,7 +94,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock algBlock, BytesRefBlo continue position; } try { - result.appendBytesRef(Hash.process(this.scratch, algBlock.getBytesRef(algBlock.getFirstValueIndex(p), algScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + result.appendBytesRef(Hash.process(this.scratch, algorithmBlock.getBytesRef(algorithmBlock.getFirstValueIndex(p), algorithmScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); } catch (NoSuchAlgorithmException e) { warnings().registerException(e); result.appendNull(); @@ -103,14 +104,14 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock algBlock, BytesRefBlo } } - public BytesRefBlock eval(int positionCount, BytesRefVector algVector, + public BytesRefBlock eval(int positionCount, BytesRefVector algorithmVector, BytesRefVector inputVector) { try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { - BytesRef algScratch = new BytesRef(); + BytesRef algorithmScratch = new BytesRef(); BytesRef inputScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { try { - result.appendBytesRef(Hash.process(this.scratch, algVector.getBytesRef(p, algScratch), inputVector.getBytesRef(p, inputScratch))); + result.appendBytesRef(Hash.process(this.scratch, algorithmVector.getBytesRef(p, algorithmScratch), inputVector.getBytesRef(p, inputScratch))); } catch (NoSuchAlgorithmException e) { warnings().registerException(e); result.appendNull(); @@ -122,12 +123,12 @@ public BytesRefBlock eval(int positionCount, BytesRefVector algVector, @Override public String toString() { - return "HashEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + return "HashEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; } @Override public void close() { - Releasables.closeExpectNoException(scratch, alg, input); + Releasables.closeExpectNoException(scratch, algorithm, input); } private Warnings warnings() { @@ -147,27 +148,27 @@ static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Function scratch; - private final EvalOperator.ExpressionEvaluator.Factory alg; + private final EvalOperator.ExpressionEvaluator.Factory algorithm; private final EvalOperator.ExpressionEvaluator.Factory input; public Factory(Source source, Function scratch, - EvalOperator.ExpressionEvaluator.Factory alg, + EvalOperator.ExpressionEvaluator.Factory algorithm, EvalOperator.ExpressionEvaluator.Factory input) { this.source = source; this.scratch = scratch; - this.alg = alg; + this.algorithm = algorithm; this.input = input; } @Override public HashEvaluator get(DriverContext context) { - return new HashEvaluator(source, scratch.apply(context), alg.get(context), input.get(context), context); + return new HashEvaluator(source, scratch.apply(context), algorithm.get(context), input.get(context), context); } @Override public String toString() { - return "HashEvaluator[" + "alg=" + alg + ", input=" + input + "]"; + return "HashEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java index 38d085e93d5a..b5364935464b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -38,7 +38,7 @@ public class Hash extends EsqlScalarFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hash", Hash::new); - private final Expression alg; + private final Expression algorithm; private final Expression input; @FunctionInfo( @@ -47,11 +47,11 @@ public class Hash extends EsqlScalarFunction { ) public Hash( Source source, - @Param(name = "alg", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression alg, + @Param(name = "algorithm", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression algorithm, @Param(name = "input", type = { "keyword", "text" }, description = "Input to hash.") Expression input ) { - super(source, List.of(alg, input)); - this.alg = alg; + super(source, List.of(algorithm, input)); + this.algorithm = algorithm; this.input = input; } @@ -62,7 +62,7 @@ private Hash(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { source().writeTo(out); - out.writeNamedWriteable(alg); + out.writeNamedWriteable(algorithm); out.writeNamedWriteable(input); } @@ -82,7 +82,7 @@ protected TypeResolution resolveType() { return new TypeResolution("Unresolved children"); } - TypeResolution resolution = isString(alg, sourceText(), FIRST); + TypeResolution resolution = isString(algorithm, sourceText(), FIRST); if (resolution.unresolved()) { return resolution; } @@ -92,28 +92,31 @@ protected TypeResolution resolveType() { @Override public boolean foldable() { - return alg.foldable() && input.foldable(); + return algorithm.foldable() && input.foldable(); } @Evaluator(warnExceptions = NoSuchAlgorithmException.class) - static BytesRef process(@Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, BytesRef alg, BytesRef input) - throws NoSuchAlgorithmException { - return hash(scratch, MessageDigest.getInstance(alg.utf8ToString()), input); + static BytesRef process( + @Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, + BytesRef algorithm, + BytesRef input + ) throws NoSuchAlgorithmException { + return hash(scratch, MessageDigest.getInstance(algorithm.utf8ToString()), input); } @Evaluator(extraName = "Constant") static BytesRef processConstant( @Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, - @Fixed(build = true) MessageDigest alg, + @Fixed(build = true) MessageDigest algorithm, BytesRef input ) { - return hash(scratch, alg, input); + return hash(scratch, algorithm, input); } - private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest alg, BytesRef input) { - alg.reset(); - alg.update(input.bytes, input.offset, input.length); - var digest = alg.digest(); + private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest algorithm, BytesRef input) { + algorithm.reset(); + algorithm.update(input.bytes, input.offset, input.length); + var digest = algorithm.digest(); scratch.clear(); scratch.grow(digest.length * 2); appendUtf8HexDigest(scratch, digest); @@ -135,9 +138,9 @@ private static void appendUtf8HexDigest(BreakingBytesRefBuilder scratch, byte[] @Override public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { - if (alg.foldable()) { + if (algorithm.foldable()) { try { - var md = MessageDigest.getInstance(((BytesRef) alg.fold()).utf8ToString()); + var md = MessageDigest.getInstance(((BytesRef) algorithm.fold()).utf8ToString()); return new HashConstantEvaluator.Factory( source(), context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), @@ -145,13 +148,13 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvalua toEvaluator.apply(input) ); } catch (NoSuchAlgorithmException e) { - throw new InvalidArgumentException(e, "invalid alg for [{}]: {}", sourceText(), e.getMessage()); + throw new InvalidArgumentException(e, "invalid algorithm for [{}]: {}", sourceText(), e.getMessage()); } } else { return new HashEvaluator.Factory( source(), context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), - toEvaluator.apply(alg), + toEvaluator.apply(algorithm), toEvaluator.apply(input) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index 887cec50ed6b..ba6b44922ded 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -42,46 +42,46 @@ public HashTests(@Name("TestCase") Supplier testCaseS @ParametersFactory public static Iterable parameters() { List cases = new ArrayList<>(); - for (String alg : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) { - cases.addAll(createTestCases(alg)); + for (String algorithm : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) { + cases.addAll(createTestCases(algorithm)); } - cases.add(new TestCaseSupplier("Invalid alg", List.of(DataType.KEYWORD, DataType.KEYWORD), () -> { + cases.add(new TestCaseSupplier("Invalid algorithm", List.of(DataType.KEYWORD, DataType.KEYWORD), () -> { var input = randomAlphaOfLength(10); return new TestCaseSupplier.TestCase( List.of( - new TestCaseSupplier.TypedData(new BytesRef("invalid"), DataType.KEYWORD, "alg"), + new TestCaseSupplier.TypedData(new BytesRef("invalid"), DataType.KEYWORD, "algorithm"), new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "input") ), - "HashEvaluator[alg=Attribute[channel=0], input=Attribute[channel=1]]", + "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", DataType.KEYWORD, is(nullValue()) ).withWarning("Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.") .withWarning("Line -1:-1: java.security.NoSuchAlgorithmException: invalid MessageDigest not available") - .withFoldingException(InvalidArgumentException.class, "invalid alg for []: invalid MessageDigest not available"); + .withFoldingException(InvalidArgumentException.class, "invalid algorithm for []: invalid MessageDigest not available"); })); return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases, (v, p) -> "string"); } - private static List createTestCases(String alg) { + private static List createTestCases(String algorithm) { return List.of( - createTestCase(alg, DataType.KEYWORD, DataType.KEYWORD), - createTestCase(alg, DataType.KEYWORD, DataType.TEXT), - createTestCase(alg, DataType.TEXT, DataType.KEYWORD), - createTestCase(alg, DataType.TEXT, DataType.TEXT) + createTestCase(algorithm, DataType.KEYWORD, DataType.KEYWORD), + createTestCase(algorithm, DataType.KEYWORD, DataType.TEXT), + createTestCase(algorithm, DataType.TEXT, DataType.KEYWORD), + createTestCase(algorithm, DataType.TEXT, DataType.TEXT) ); } - private static TestCaseSupplier createTestCase(String alg, DataType algType, DataType inputType) { - return new TestCaseSupplier(alg, List.of(algType, inputType), () -> { + private static TestCaseSupplier createTestCase(String algorithm, DataType algorithmType, DataType inputType) { + return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { var input = randomAlphaOfLength(10); return new TestCaseSupplier.TestCase( List.of( - new TestCaseSupplier.TypedData(new BytesRef(alg), algType, "alg"), + new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm"), new TestCaseSupplier.TypedData(new BytesRef(input), inputType, "input") ), - "HashEvaluator[alg=Attribute[channel=0], input=Attribute[channel=1]]", + "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", DataType.KEYWORD, - equalTo(new BytesRef(hash(alg, input))) + equalTo(new BytesRef(hash(algorithm, input))) ); }); } @@ -108,6 +108,6 @@ public void testInvalidAlgLiteral() { new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("str", DataType.KEYWORD)) ).get(driverContext) ); - assertThat(e.getMessage(), startsWith("invalid alg for [hast(\"invalid\", input)]: invalid MessageDigest not available")); + assertThat(e.getMessage(), startsWith("invalid algorithm for [hast(\"invalid\", input)]: invalid MessageDigest not available")); } } From 3abf208f22bafaf5c903f67ae30d942b1262c262 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Thu, 12 Dec 2024 10:54:59 +0100 Subject: [PATCH 25/33] more hash test cases --- .../src/main/resources/hash.csv-spec | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index 0a3ae1936e7e..50fe5ccff504 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -13,3 +13,40 @@ Connected to 10.1.0.2 | 8f8f1cb60832d153f5b9ec6dc828b93f | b0db24720f15857091b3c Connected to 10.1.0.3 | 912b6dc13503165a15de43304bb77c78 | 75b0480188db8acc4d5cc666a51227eb2bc5b989cd8ca912609f33e0846eff57 Disconnected | ef70e46fd3bbc21e3e1f0b6815e750c0 | 04dfac3671b494ad53fcd152f7a14511bfb35747278aad8ce254a0d6e4ba4718 ; + + +hashOfConvertedType +required_capability: hash_function + +FROM sample_data +| WHERE message != "Connection error" +| EVAL input = event_duration::STRING, md5 = hash("md5", input), sha256 = hash("sha256", input) +| KEEP message, input, md5, sha256; +ignoreOrder:true + +message:keyword | input:keyword | md5:keyword | sha256:keyword +Connected to 10.1.0.1 | 1756467 | c4fc1c57ee9b1d2b2023b70c8c167b54 | 8376a50a7ba7e6bd1bf9ad0c32d27d2f49fd0fa422573f98f239e21048b078f3 +Connected to 10.1.0.2 | 2764889 | 8e8cf005e11a7b5df1d9478a4715a444 | 1031f2bef8eaecbf47319505422300b27ea1f7c38b6717d41332325062f9a56a +Connected to 10.1.0.3 | 3450233 | 09f2c64f5a55e9edf8ffbad336b561d8 | f77d7545769c4ecc85092f4f0b7ec8c20f467e4beb15fe67ca29f9aa8e9a6900 +Disconnected | 1232382 | 6beac1485638d51e13c2c53990a2f611 | 9a03c1274a3ebb6c1cb85d170ce0a6fdb9d2232724e06b9f5e7cb9274af3cad6 +; + + +hashOfNullInput +required_capability: hash_function + +ROW input=null::STRING | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input); + +input:keyword | md5:keyword | sha256:keyword +null | null | null +; + + +hashWithNullAlgorithm +required_capability: hash_function + +ROW input="input" | EVAL hash = hash(null, input); + +input:keyword | hash:keyword +input | null +; From 098378628c3e092d6638fb9ee51ab8ee24a3313d Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 13 Dec 2024 12:58:11 +0100 Subject: [PATCH 26/33] cover folded literal case --- .../scalar/string/HashConstantEvaluator.java | 9 ++-- .../function/scalar/string/Hash.java | 44 +++++++++++++++++-- .../function/scalar/string/HashTests.java | 31 ++++++++++--- 3 files changed, 69 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java index d5bfbfca04d8..34cff7301863 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java @@ -7,7 +7,6 @@ import java.lang.IllegalArgumentException; import java.lang.Override; import java.lang.String; -import java.security.MessageDigest; import java.util.function.Function; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; @@ -30,7 +29,7 @@ public final class HashConstantEvaluator implements EvalOperator.ExpressionEvalu private final BreakingBytesRefBuilder scratch; - private final MessageDigest algorithm; + private final Hash.HashFunction algorithm; private final EvalOperator.ExpressionEvaluator input; @@ -39,7 +38,7 @@ public final class HashConstantEvaluator implements EvalOperator.ExpressionEvalu private Warnings warnings; public HashConstantEvaluator(Source source, BreakingBytesRefBuilder scratch, - MessageDigest algorithm, EvalOperator.ExpressionEvaluator input, + Hash.HashFunction algorithm, EvalOperator.ExpressionEvaluator input, DriverContext driverContext) { this.source = source; this.scratch = scratch; @@ -117,12 +116,12 @@ static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Function scratch; - private final Function algorithm; + private final Function algorithm; private final EvalOperator.ExpressionEvaluator.Factory input; public Factory(Source source, Function scratch, - Function algorithm, + Function algorithm, EvalOperator.ExpressionEvaluator.Factory input) { this.source = source; this.scratch = scratch; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java index b5364935464b..20b77f1f7143 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -14,6 +14,7 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -29,6 +30,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.function.Function; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; @@ -107,10 +109,10 @@ static BytesRef process( @Evaluator(extraName = "Constant") static BytesRef processConstant( @Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, - @Fixed(build = true) MessageDigest algorithm, + @Fixed(build = true) HashFunction algorithm, BytesRef input ) { - return hash(scratch, algorithm, input); + return hash(scratch, algorithm.digest, input); } private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest algorithm, BytesRef input) { @@ -140,11 +142,22 @@ private static void appendUtf8HexDigest(BreakingBytesRefBuilder scratch, byte[] public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { if (algorithm.foldable()) { try { - var md = MessageDigest.getInstance(((BytesRef) algorithm.fold()).utf8ToString()); + // hash function is created here in order to validate the algorithm is valid before evaluator is created + var hf = HashFunction.create((BytesRef) algorithm.fold()); return new HashConstantEvaluator.Factory( source(), context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), - context -> md, + new Function<>() { + @Override + public HashFunction apply(DriverContext context) { + return hf.copy(); + } + + @Override + public String toString() { + return hf.toString(); + } + }, toEvaluator.apply(input) ); } catch (NoSuchAlgorithmException e) { @@ -169,4 +182,27 @@ public Expression replaceChildren(List newChildren) { protected NodeInfo info() { return NodeInfo.create(this, Hash::new, children().get(0), children().get(1)); } + + public record HashFunction(String algorithm, MessageDigest digest) { + + public static HashFunction create(BytesRef literal) throws NoSuchAlgorithmException { + var algorithm = literal.utf8ToString(); + var digest = MessageDigest.getInstance(algorithm); + return new HashFunction(algorithm, digest); + } + + public HashFunction copy() { + try { + return new HashFunction(algorithm, MessageDigest.getInstance(algorithm)); + } catch (NoSuchAlgorithmException e) { + assert false : "Algorithm should be valid at this point"; + throw new IllegalStateException(e); + } + } + + @Override + public String toString() { + return algorithm; + } + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index ba6b44922ded..ed793fd15ec8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -67,7 +67,11 @@ private static List createTestCases(String algorithm) { createTestCase(algorithm, DataType.KEYWORD, DataType.KEYWORD), createTestCase(algorithm, DataType.KEYWORD, DataType.TEXT), createTestCase(algorithm, DataType.TEXT, DataType.KEYWORD), - createTestCase(algorithm, DataType.TEXT, DataType.TEXT) + createTestCase(algorithm, DataType.TEXT, DataType.TEXT), + createLiteralTestCase(algorithm, DataType.KEYWORD, DataType.KEYWORD), + createLiteralTestCase(algorithm, DataType.KEYWORD, DataType.TEXT), + createLiteralTestCase(algorithm, DataType.TEXT, DataType.KEYWORD), + createLiteralTestCase(algorithm, DataType.TEXT, DataType.KEYWORD) ); } @@ -86,11 +90,26 @@ private static TestCaseSupplier createTestCase(String algorithm, DataType algori }); } - private static String hash(String alg, String input) { + private static TestCaseSupplier createLiteralTestCase(String algorithm, DataType algorithmType, DataType inputType) { + return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { + var input = randomAlphaOfLength(10); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm").forceLiteral(), + new TestCaseSupplier.TypedData(new BytesRef(input), inputType, "input") + ), + "HashEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=1]]", + DataType.KEYWORD, + equalTo(new BytesRef(hash(algorithm, input))) + ); + }); + } + + private static String hash(String algorithm, String input) { try { - return HexFormat.of().formatHex(MessageDigest.getInstance(alg).digest(input.getBytes(StandardCharsets.UTF_8))); + return HexFormat.of().formatHex(MessageDigest.getInstance(algorithm).digest(input.getBytes(StandardCharsets.UTF_8))); } catch (NoSuchAlgorithmException e) { - throw new IllegalArgumentException("Unknown algorithm: " + alg); + throw new IllegalArgumentException("Unknown algorithm: " + algorithm); } } @@ -99,13 +118,13 @@ protected Expression build(Source source, List args) { return new Hash(source, args.get(0), args.get(1)); } - public void testInvalidAlgLiteral() { + public void testInvalidAlgorithmLiteral() { Source source = new Source(0, 0, "hast(\"invalid\", input)"); DriverContext driverContext = driverContext(); InvalidArgumentException e = expectThrows( InvalidArgumentException.class, () -> evaluator( - new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("str", DataType.KEYWORD)) + new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("input", DataType.KEYWORD)) ).get(driverContext) ); assertThat(e.getMessage(), startsWith("invalid algorithm for [hast(\"invalid\", input)]: invalid MessageDigest not available")); From 08eaee6c2c16c41a4e1b7d33a3380dfa4dae86c9 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 13 Dec 2024 13:48:23 +0100 Subject: [PATCH 27/33] fix test --- .../xpack/esql/expression/function/scalar/string/HashTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index ed793fd15ec8..c1f1f3fb9686 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -98,7 +98,7 @@ private static TestCaseSupplier createLiteralTestCase(String algorithm, DataType new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm").forceLiteral(), new TestCaseSupplier.TypedData(new BytesRef(input), inputType, "input") ), - "HashEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=1]]", + "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]", DataType.KEYWORD, equalTo(new BytesRef(hash(algorithm, input))) ); From bfab16b35aac367ebbd4eec45b9b6955fb2d60b5 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 13 Dec 2024 14:34:36 +0100 Subject: [PATCH 28/33] additional cases --- .../src/main/resources/hash.csv-spec | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index 50fe5ccff504..fcac1e1859c6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -32,6 +32,15 @@ Disconnected | 1232382 | 6beac1485638d51e13c2c53990a2f611 | 9a03c ; +hashOfEmptyInput +required_capability: hash_function + +ROW input="" | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input); + +input:keyword | md5:keyword | sha256:keyword + | d41d8cd98f00b204e9800998ecf8427e | e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +; + hashOfNullInput required_capability: hash_function @@ -50,3 +59,47 @@ ROW input="input" | EVAL hash = hash(null, input); input:keyword | hash:keyword input | null ; + + +hashWithMv +required_capability: hash_function + +ROW input=["foo", "bar"] | mv_expand input | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input); + +input:keyword | md5:keyword | sha256:keyword +foo | acbd18db4cc2f85cedef654fccc4a4d8 | 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae +bar | 37b51d194a7513e45b56f6524f2d51f2 | fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9 +; + + +hashWithNestedFunctions +required_capability: hash_function + +ROW input=["foo", "bar"] | EVAL hash = concat(hash("md5", mv_concat(input, "-")), "-", hash("sha256", mv_concat(input, "-"))); + +input:keyword | hash:keyword +["foo", "bar"] | e5f9ec048d1dbe19c70f720e002f9cb1-7d89c4f517e3bd4b5e8e76687937005b602ea00c5cba3e25ef1fc6575a55103e +; + + +hashWithConvertedTypes +required_capability: hash_function + +ROW input=42 | EVAL md5 = hash("md5", input::STRING), sha256 = hash("sha256", to_string(input)); + +input:integer | md5:keyword | sha256:keyword +42 | a1d0c6e83f027327d8461063f4ac58a6 | 73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049 +; + + +hashWithStats +required_capability: hash_function + +FROM sample_data +| EVAL md5="md5" +| STATS count = count(*) by hash(md5, message) +| WHERE count > 1; + +count:long | hash(md5, message):keyword +3 | 2e92ae79ff32b37fee4368a594792183 +; From be91871429976188e2f59b714b22d01e554f90c1 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Fri, 13 Dec 2024 15:56:46 +0100 Subject: [PATCH 29/33] update function count --- .../resources/rest-api-spec/test/esql/60_usage.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 2a4cde9a680e..b6d75048591e 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -92,7 +92,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} # Testing for the entire function set isn't feasbile, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 129} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 130} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": @@ -163,4 +163,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 125} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 126} # check the "sister" test above for a likely update to the same esql.functions length check From fb86d18a21b961485fb2565f8e7c74eb659f8a30 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Mon, 16 Dec 2024 08:33:12 +0100 Subject: [PATCH 30/33] fix order --- .../expression/function/scalar/ScalarFunctionWritables.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 820af4e7f3d0..3cf0eef9074a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -64,8 +64,8 @@ public static List getNamedWriteables() { entries.add(Concat.ENTRY); entries.add(E.ENTRY); entries.add(EndsWith.ENTRY); - entries.add(Hash.ENTRY); entries.add(Greatest.ENTRY); + entries.add(Hash.ENTRY); entries.add(Hypot.ENTRY); entries.add(In.ENTRY); entries.add(InsensitiveEquals.ENTRY); From edcf755774ac231915c5e737b928559bdd443876 Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Mon, 16 Dec 2024 08:45:46 +0100 Subject: [PATCH 31/33] randomize input --- .../function/scalar/string/HashTests.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index c1f1f3fb9686..a78d34d1cab3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -11,6 +11,7 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -77,30 +78,24 @@ private static List createTestCases(String algorithm) { private static TestCaseSupplier createTestCase(String algorithm, DataType algorithmType, DataType inputType) { return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { - var input = randomAlphaOfLength(10); + var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get(); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm"), - new TestCaseSupplier.TypedData(new BytesRef(input), inputType, "input") - ), + List.of(new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm"), input), "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", DataType.KEYWORD, - equalTo(new BytesRef(hash(algorithm, input))) + equalTo(new BytesRef(hash(algorithm, BytesRefs.toString(input.data())))) ); }); } private static TestCaseSupplier createLiteralTestCase(String algorithm, DataType algorithmType, DataType inputType) { return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { - var input = randomAlphaOfLength(10); + var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get(); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm").forceLiteral(), - new TestCaseSupplier.TypedData(new BytesRef(input), inputType, "input") - ), + List.of(new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm").forceLiteral(), input), "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]", DataType.KEYWORD, - equalTo(new BytesRef(hash(algorithm, input))) + equalTo(new BytesRef(hash(algorithm, BytesRefs.toString(input.data())))) ); }); } From 2c8f5416ea1fa6c55b608e9813545ded5150388e Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Mon, 16 Dec 2024 08:59:52 +0100 Subject: [PATCH 32/33] merge test scenario creation --- .../function/scalar/string/HashTests.java | 37 ++++++++----------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index a78d34d1cab3..f2c3fba95219 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -65,39 +65,34 @@ public static Iterable parameters() { private static List createTestCases(String algorithm) { return List.of( - createTestCase(algorithm, DataType.KEYWORD, DataType.KEYWORD), - createTestCase(algorithm, DataType.KEYWORD, DataType.TEXT), - createTestCase(algorithm, DataType.TEXT, DataType.KEYWORD), - createTestCase(algorithm, DataType.TEXT, DataType.TEXT), - createLiteralTestCase(algorithm, DataType.KEYWORD, DataType.KEYWORD), - createLiteralTestCase(algorithm, DataType.KEYWORD, DataType.TEXT), - createLiteralTestCase(algorithm, DataType.TEXT, DataType.KEYWORD), - createLiteralTestCase(algorithm, DataType.TEXT, DataType.KEYWORD) + createTestCase(algorithm, false, DataType.KEYWORD, DataType.KEYWORD), + createTestCase(algorithm, false, DataType.KEYWORD, DataType.TEXT), + createTestCase(algorithm, false, DataType.TEXT, DataType.KEYWORD), + createTestCase(algorithm, false, DataType.TEXT, DataType.TEXT), + createTestCase(algorithm, true, DataType.KEYWORD, DataType.KEYWORD), + createTestCase(algorithm, true, DataType.KEYWORD, DataType.TEXT), + createTestCase(algorithm, true, DataType.TEXT, DataType.KEYWORD), + createTestCase(algorithm, true, DataType.TEXT, DataType.TEXT) ); } - private static TestCaseSupplier createTestCase(String algorithm, DataType algorithmType, DataType inputType) { + private static TestCaseSupplier createTestCase(String algorithm, boolean forceLiteral, DataType algorithmType, DataType inputType) { return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get(); return new TestCaseSupplier.TestCase( - List.of(new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm"), input), - "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", + List.of(createTypedData(algorithm, forceLiteral, algorithmType, "algorithm"), input), + forceLiteral + ? "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]" + : "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", DataType.KEYWORD, equalTo(new BytesRef(hash(algorithm, BytesRefs.toString(input.data())))) ); }); } - private static TestCaseSupplier createLiteralTestCase(String algorithm, DataType algorithmType, DataType inputType) { - return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { - var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get(); - return new TestCaseSupplier.TestCase( - List.of(new TestCaseSupplier.TypedData(new BytesRef(algorithm), algorithmType, "algorithm").forceLiteral(), input), - "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]", - DataType.KEYWORD, - equalTo(new BytesRef(hash(algorithm, BytesRefs.toString(input.data())))) - ); - }); + private static TestCaseSupplier.TypedData createTypedData(String value, boolean forceLiteral, DataType type, String name) { + var data = new TestCaseSupplier.TypedData(new BytesRef(value), type, name); + return forceLiteral ? data.forceLiteral() : data; } private static String hash(String algorithm, String input) { From c72e94772277bc7a96490b8f2225a8b832654eaf Mon Sep 17 00:00:00 2001 From: "ievgen.degtiarenko" Date: Wed, 18 Dec 2024 08:51:16 +0100 Subject: [PATCH 33/33] cleanup tests --- .../function/scalar/string/Hash.java | 15 ++++- .../AbstractExpressionSerializationTests.java | 5 ++ .../scalar/string/HashSerializationTests.java | 27 ++++++++ .../scalar/string/HashStaticTests.java | 66 +++++++++++++++++++ .../function/scalar/string/HashTests.java | 15 ----- 5 files changed, 110 insertions(+), 18 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java index 20b77f1f7143..99c5908699ec 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.function.Function; +import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; @@ -99,7 +100,7 @@ public boolean foldable() { @Evaluator(warnExceptions = NoSuchAlgorithmException.class) static BytesRef process( - @Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, + @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, BytesRef algorithm, BytesRef input ) throws NoSuchAlgorithmException { @@ -108,8 +109,8 @@ static BytesRef process( @Evaluator(extraName = "Constant") static BytesRef processConstant( - @Fixed(includeInToString = false, build = true) BreakingBytesRefBuilder scratch, - @Fixed(build = true) HashFunction algorithm, + @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, + @Fixed(scope = THREAD_LOCAL) HashFunction algorithm, BytesRef input ) { return hash(scratch, algorithm.digest, input); @@ -205,4 +206,12 @@ public String toString() { return algorithm; } } + + Expression algorithm() { + return algorithm; + } + + Expression input() { + return input; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java index 6dd0c5fe88af..050293e58c19 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java @@ -14,10 +14,15 @@ import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests; public abstract class AbstractExpressionSerializationTests extends AbstractNodeSerializationTests { + public static Expression randomChild() { return ReferenceAttributeTests.randomReferenceAttribute(false); } + public static Expression mutateExpression(Expression expression) { + return randomValueOtherThan(expression, AbstractExpressionSerializationTests::randomChild); + } + @Override protected final NamedWriteableRegistry getNamedWriteableRegistry() { return new NamedWriteableRegistry(ExpressionWritables.getNamedWriteables()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java new file mode 100644 index 000000000000..f21105c2c8bc --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; + +import java.io.IOException; + +public class HashSerializationTests extends AbstractExpressionSerializationTests { + + @Override + protected Hash createTestInstance() { + return new Hash(randomSource(), randomChild(), randomChild()); + } + + @Override + protected Hash mutateInstance(Hash instance) throws IOException { + return randomBoolean() + ? new Hash(instance.source(), mutateExpression(instance.algorithm()), instance.input()) + : new Hash(instance.source(), instance.algorithm(), mutateExpression(instance.input())); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java new file mode 100644 index 000000000000..871bec7c0680 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.junit.After; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase.evaluator; +import static org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase.field; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.startsWith; + +public class HashStaticTests extends ESTestCase { + + public void testInvalidAlgorithmLiteral() { + Source source = new Source(0, 0, "hast(\"invalid\", input)"); + DriverContext driverContext = driverContext(); + InvalidArgumentException e = expectThrows( + InvalidArgumentException.class, + () -> evaluator( + new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("input", DataType.KEYWORD)) + ).get(driverContext) + ); + assertThat(e.getMessage(), startsWith("invalid algorithm for [hast(\"invalid\", input)]: invalid MessageDigest not available")); + } + + /** + * The following fields and methods were borrowed from AbstractScalarFunctionTestCase + */ + private final List breakers = Collections.synchronizedList(new ArrayList<>()); + + private DriverContext driverContext() { + BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking(); + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + breakers.add(breaker); + return new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays)); + } + + @After + public void allMemoryReleased() { + for (CircuitBreaker breaker : breakers) { + assertThat(breaker.getUsed(), equalTo(0L)); + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index f2c3fba95219..c5cdf97eccd1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -12,10 +12,8 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.BytesRefs; -import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; @@ -32,7 +30,6 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; -import static org.hamcrest.Matchers.startsWith; public class HashTests extends AbstractScalarFunctionTestCase { @@ -107,16 +104,4 @@ private static String hash(String algorithm, String input) { protected Expression build(Source source, List args) { return new Hash(source, args.get(0), args.get(1)); } - - public void testInvalidAlgorithmLiteral() { - Source source = new Source(0, 0, "hast(\"invalid\", input)"); - DriverContext driverContext = driverContext(); - InvalidArgumentException e = expectThrows( - InvalidArgumentException.class, - () -> evaluator( - new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("input", DataType.KEYWORD)) - ).get(driverContext) - ); - assertThat(e.getMessage(), startsWith("invalid algorithm for [hast(\"invalid\", input)]: invalid MessageDigest not available")); - } }