Skip to content

Commit

Permalink
add mv_find
Browse files Browse the repository at this point in the history
  • Loading branch information
fang-xing-esql committed Mar 12, 2024
1 parent 92f6197 commit 3818a1b
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/reference/esql/functions/mv-functions.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* <<esql-mv_concat>>
* <<esql-mv_count>>
* <<esql-mv_dedupe>>
* <<esql-mv_find>>
* <<esql-mv_first>>
* <<esql-mv_last>>
* <<esql-mv_max>>
Expand All @@ -26,6 +27,7 @@ include::mv_avg.asciidoc[]
include::mv_concat.asciidoc[]
include::mv_count.asciidoc[]
include::mv_dedupe.asciidoc[]
include::mv_find.asciidoc[]
include::mv_first.asciidoc[]
include::mv_last.asciidoc[]
include::mv_max.asciidoc[]
Expand Down
35 changes: 35 additions & 0 deletions docs/reference/esql/functions/mv_find.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[discrete]
[[esql-mv_find]]
=== `MV_FIND`

*Syntax*

[.text-center]
image::esql/functions/signature/mv_find.svg[Embedded,opts=inline]

*Parameters*

`field`::
Multivalue expression.

`pattern`::
Regular expression.

*Description*

Returns the index for the first value in a multivalued field that matches a regular expression.

*Supported types*

include::types/mv_find.asciidoc[]

*Example*

[source.merge.styled,esql]
----
include::{esql-specs}/string.csv-spec[tag=mv_find]
----
[%header.monospaced.styled,format=dsv,separator=|]
|===
include::{esql-specs}/string.csv-spec[tag=mv_find-result]
|===
1 change: 1 addition & 0 deletions docs/reference/esql/functions/signature/mv_find.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions docs/reference/esql/functions/types/mv_find.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[%header.monospaced.styled,format=dsv,separator=|]
|===
field | pattern | result
keyword | keyword | integer
text | text | integer
|===
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ mv_avg |"double mv_avg(field:double|integer|long|unsigned_long
mv_concat |"keyword mv_concat(v:text|keyword, delim:text|keyword)" |[v, delim] |["text|keyword", "text|keyword"] |["values to join", "delimiter"] |keyword | "Reduce a multivalued string field to a single valued field by concatenating all values." | [false, false] | false | false
mv_count |"integer mv_count(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)" |v | "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" | "" | integer | "Reduce a multivalued field to a single valued field containing the count of values." | false | false | false
mv_dedupe |"boolean|date|double|integer|ip|keyword|long|text|version mv_dedupe(v:boolean|date|double|integer|ip|keyword|long|text|version)" |v | "boolean|date|double|integer|ip|keyword|long|text|version" | "" |"boolean|date|double|integer|ip|keyword|long|text|version" | "Remove duplicate values from a multivalued field." | false | false | false
mv_find |"integer mv_find(field:keyword|text, pattern:keyword|text)" |[field, pattern] | ["keyword|text", "keyword|text"] | [A multivalued field, A regular expression] | "integer" | "Returns the index for the first value in a multivalued field that matches a regular expression." | [false, false] | false | false
mv_first |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version mv_first(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)" |v | "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" | "" |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" | "Reduce a multivalued field to a single valued field containing the first value." | false | false | false
mv_last |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version mv_last(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)" |v | "boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" | "" |"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version" | "Reduce a multivalued field to a single valued field containing the last value." | false | false | false
mv_max |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_max(v:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)" |v | "boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" | "" |"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version" | "Reduce a multivalued field to a single valued field containing the maximum value." | false | false | false
Expand Down Expand Up @@ -150,6 +151,7 @@ double e()
"keyword mv_concat(v:text|keyword, delim:text|keyword)"
"integer mv_count(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)"
"boolean|date|double|integer|ip|keyword|long|text|version mv_dedupe(v:boolean|date|double|integer|ip|keyword|long|text|version)"
"integer mv_find(field:keyword|text, pattern:keyword|text)"
"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version mv_first(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)"
"boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version mv_last(v:boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|unsigned_long|version)"
"boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version mv_max(v:boolean|date|double|integer|ip|keyword|long|text|unsigned_long|version)"
Expand Down Expand Up @@ -228,5 +230,5 @@ countFunctions#[skip:-8.13.99]
show functions | stats a = count(*), b = count(*), c = count(*) | mv_expand c;

a:long | b:long | c:long
94 | 94 | 94
95 | 95 | 95
;
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,33 @@ emp_no:integer | full_name:keyword | full_name_2:keyword | job_positions:keyword
10005 | Kyoichi Maliniak | Maliniak,Kyoichi | null | [-2.14,13.07] | [-2.14,13.07]
;

mvFind#[skip:-8.13.99, reason:newly added in 8.14]
// tag::mv_find[]
ROW a = ["Facello", "Simmel", "Bamford", "Koblick", "Maliniak"]
| eval match = mv_find(a, "m+")
// end::mv_find[]
;

// tag::mv_find-result[]
a:keyword | match:integer
["Facello", "Simmel", "Bamford", "Koblick", "Maliniak"] | 1
// end::mv_find-result[]
;

mvFindEmp#[skip:-8.13.99, reason:newly added in 8.14]
from employees
| eval match = mv_find(job_positions, "Senior")
| sort emp_no
| keep emp_no, job_positions, match
| limit 3
;

emp_no:integer | job_positions:keyword | match:integer
10001 | [Accountant, Senior Python Developer] | 1
10002 | Senior Team Lead | 0
10003 | null | null
;

showTextFields
from hosts | where host == "beta" | keep host, host_group, description;
ignoreOrder:true
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue;

import java.lang.Override;
import java.lang.String;
import java.util.regex.Pattern;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.expression.function.Warnings;
import org.elasticsearch.xpack.ql.tree.Source;

/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link MvFind}.
* This class is generated. Do not edit it.
*/
public final class MvFindEvaluator implements EvalOperator.ExpressionEvaluator {
private final Warnings warnings;

private final EvalOperator.ExpressionEvaluator field;

private final Pattern pattern;

private final DriverContext driverContext;

public MvFindEvaluator(Source source, EvalOperator.ExpressionEvaluator field, Pattern pattern,
DriverContext driverContext) {
this.warnings = new Warnings(source);
this.field = field;
this.pattern = pattern;
this.driverContext = driverContext;
}

@Override
public Block eval(Page page) {
try (BytesRefBlock fieldBlock = (BytesRefBlock) field.eval(page)) {
return eval(page.getPositionCount(), fieldBlock);
}
}

public IntBlock eval(int positionCount, BytesRefBlock fieldBlock) {
try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) {
position: for (int p = 0; p < positionCount; p++) {
boolean allBlocksAreNulls = true;
if (!fieldBlock.isNull(p)) {
allBlocksAreNulls = false;
}
if (allBlocksAreNulls) {
result.appendNull();
continue position;
}
MvFind.process(result, p, fieldBlock, pattern);
}
return result.build();
}
}

@Override
public String toString() {
return "MvFindEvaluator[" + "field=" + field + ", pattern=" + pattern + "]";
}

@Override
public void close() {
Releasables.closeExpectNoException(field);
}

static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;

private final EvalOperator.ExpressionEvaluator.Factory field;

private final Pattern pattern;

public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field, Pattern pattern) {
this.source = source;
this.field = field;
this.pattern = pattern;
}

@Override
public MvFindEvaluator get(DriverContext context) {
return new MvFindEvaluator(source, field.get(context), pattern, context);
}

@Override
public String toString() {
return "MvFindEvaluator[" + "field=" + field + ", pattern=" + pattern + "]";
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvConcat;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvCount;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvDedupe;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvFind;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvFirst;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvLast;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMax;
Expand Down Expand Up @@ -209,6 +210,7 @@ private FunctionDefinition[][] functions() {
def(MvConcat.class, MvConcat::new, "mv_concat"),
def(MvCount.class, MvCount::new, "mv_count"),
def(MvDedupe.class, MvDedupe::new, "mv_dedupe"),
def(MvFind.class, MvFind::new, "mv_find"),
def(MvFirst.class, MvFirst::new, "mv_first"),
def(MvLast.class, MvLast::new, "mv_last"),
def(MvMax.class, MvMax::new, "mv_max"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.ann.Fixed;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.xpack.esql.capabilities.Validatable;
import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.ql.InvalidArgumentException;
import org.elasticsearch.xpack.ql.common.Failures;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataType;
import org.elasticsearch.xpack.ql.type.DataTypes;

import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.elasticsearch.xpack.esql.expression.Validations.isFoldable;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;

/**
* Returns the index for the first value in a multivalued field that matches a regular expression.
*/
public class MvFind extends ScalarFunction implements EvaluatorMapper, Validatable {
private final Expression field, pattern;

@FunctionInfo(
returnType = { "integer" },
description = "Returns the index for the first value in a multivalued field that matches a regular expression."
)
public MvFind(
Source source,
@Param(name = "field", type = { "keyword", "text" }, description = "A multivalued field") Expression field,
@Param(name = "pattern", type = { "keyword", "text" }, description = "A regular expression") Expression pattern
) {
super(source, Arrays.asList(field, pattern));
this.field = field;
this.pattern = pattern;
}

@Override
protected TypeResolution resolveType() {
if (childrenResolved() == false) {
return new TypeResolution("Unresolved children");
}

TypeResolution resolution = isString(field, sourceText(), FIRST);
if (resolution.unresolved()) {
return resolution;
}

return isString(pattern, sourceText(), SECOND);
}

@Override
public boolean foldable() {
return field.foldable() && pattern.foldable();
}

@Override
public EvalOperator.ExpressionEvaluator.Factory toEvaluator(
Function<Expression, EvalOperator.ExpressionEvaluator.Factory> toEvaluator
) {
if (pattern.foldable()) {
try {
Pattern p = Pattern.compile(((BytesRef) pattern.fold()).utf8ToString());
return new MvFindEvaluator.Factory(source(), toEvaluator.apply(field), p);
} catch (IllegalArgumentException e) {
throw new InvalidArgumentException(e, "invalid regular expression for [{}]: {}", sourceText(), e.getMessage());
}
}
return EvalOperator.CONSTANT_NULL_FACTORY;
}

@Override
public Object fold() {
return EvaluatorMapper.super.fold();
}

@Override
public Expression replaceChildren(List<Expression> newChildren) {
return new MvFind(source(), newChildren.get(0), newChildren.get(1));
}

@Override
protected NodeInfo<? extends Expression> info() {
return NodeInfo.create(this, MvFind::new, field, pattern);
}

@Override
public DataType dataType() {
return DataTypes.INTEGER;
}

@Override
public ScriptTemplate asScript() {
throw new UnsupportedOperationException("functions do not support scripting");
}

@Override
public int hashCode() {
return Objects.hash(field, pattern);
}

@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
MvFind other = (MvFind) obj;
return Objects.equals(other.field, field) && Objects.equals(other.pattern, pattern);
}

@Evaluator
static void process(IntBlock.Builder builder, int position, BytesRefBlock field, @Fixed Pattern pattern) {
int fieldValueCount = field.getValueCount(position);
int first = field.getFirstValueIndex(position);
boolean foundMatch = false;
BytesRef fieldScratch = new BytesRef();
int i;
for (i = 0; i < fieldValueCount; i++) {
Matcher matcher = pattern.matcher(field.getBytesRef(i + first, fieldScratch).utf8ToString());
if (matcher.find()) {
foundMatch = true;
break;
}
}
if (foundMatch) {
builder.appendInt(i);
} else {
builder.appendNull();
}
}

@Override
public void validate(Failures failures) {
String operation = sourceText();
failures.add(isFoldable(pattern, operation, SECOND));
}
}
Loading

0 comments on commit 3818a1b

Please sign in to comment.