Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESQL: LTRIM, RTRIM and fix unicode whitespace #98548

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/reference/esql/esql-functions.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ these functions:
* <<esql-is_nan>>
* <<esql-length>>
* <<esql-log10>>
* <<esql-ltrim>>
* <<esql-rtrim>>
* <<esql-mv_avg>>
* <<esql-mv_concat>>
* <<esql-mv_count>>
Expand Down Expand Up @@ -85,6 +87,8 @@ include::functions/is_infinite.asciidoc[]
include::functions/is_nan.asciidoc[]
include::functions/length.asciidoc[]
include::functions/log10.asciidoc[]
include::functions/ltrim.asciidoc[]
include::functions/rtrim.asciidoc[]
include::functions/mv_avg.asciidoc[]
include::functions/mv_concat.asciidoc[]
include::functions/mv_count.asciidoc[]
Expand Down
12 changes: 12 additions & 0 deletions docs/reference/esql/functions/ltrim.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[[esql-ltrim]]
=== `LTRIM`
Removes leading whitespaces from strings.

[source.merge.styled,esql]
----
include::{esql-specs}/string.csv-spec[tag=ltrim]
----
[%header.monospaced.styled,format=dsv,separator=|]
|===
include::{esql-specs}/string.csv-spec[tag=ltrim-result]
|===
12 changes: 12 additions & 0 deletions docs/reference/esql/functions/rtrim.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[[esql-rtrim]]
=== `LTRIM`
Removes trailing whitespaces from strings.

[source.merge.styled,esql]
----
include::{esql-specs}/string.csv-spec[tag=rtrim]
----
[%header.monospaced.styled,format=dsv,separator=|]
|===
include::{esql-specs}/string.csv-spec[tag=rtrim-result]
|===
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ is_infinite |is_infinite(arg1)
is_nan |is_nan(arg1)
length |length(arg1)
log10 |log10(arg1)
ltrim |ltrim(arg1)
max |max(arg1)
median |median(arg1)
median_absolute_deviation|median_absolute_deviation(arg1)
Expand All @@ -51,6 +52,7 @@ percentile |percentile(arg1, arg2)
pi |pi()
pow |pow(arg1, arg2)
round |round(arg1, arg2)
rtrim |rtrim(arg1)
sin |sin(arg1)
sinh |sinh(arg1)
split |split(arg1, arg2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,25 +165,91 @@ emp_no:integer | last_name:keyword | x:keyword | z:keyword
10010 | Piveteau | P | a
;

ltrim
from employees | sort emp_no | limit 10 | eval name = concat(" ", first_name, " ") | eval name = ltrim(name) | eval name = concat("'", name, "'") | keep emp_no, name;

emp_no:integer | name:keyword
10001 | 'Georgi '
10002 | 'Bezalel '
10003 | 'Parto '
10004 | 'Chirstian '
10005 | 'Kyoichi '
10006 | 'Anneke '
10007 | 'Tzvetan '
10008 | 'Saniya '
10009 | 'Sumant '
10010 | 'Duangkaew '
;

ltrimRow
// tag::ltrim[]
ROW message = " some text ", color = " red "
| EVAL message = LTRIM(message)
| EVAL color = LTRIM(color)
| EVAL message = CONCAT("'", message, "'")
| EVAL color = CONCAT("'", color, "'")
// end::ltrim[]
;

// tag::ltrim-result[]
message:keyword | color:keyword
'some text ' | 'red '
// end::ltrim-result[]
;

rtrim
from employees | sort emp_no | limit 10 | eval name = concat(" ", first_name, " ") | eval name = rtrim(name) | eval name = concat("'", name, "'") | keep emp_no, name;

emp_no:integer | name:keyword
10001 | ' Georgi'
10002 | ' Bezalel'
10003 | ' Parto'
10004 | ' Chirstian'
10005 | ' Kyoichi'
10006 | ' Anneke'
10007 | ' Tzvetan'
10008 | ' Saniya'
10009 | ' Sumant'
10010 | ' Duangkaew'
;

rtrimRow
// tag::rtrim[]
ROW message = " some text ", color = " red "
| EVAL message = RTRIM(message)
| EVAL color = RTRIM(color)
| EVAL message = CONCAT("'", message, "'")
| EVAL color = CONCAT("'", color, "'")
// end::rtrim[]
;

// tag::rtrim-result[]
message:keyword | color:keyword
' some text' | ' red'
// end::rtrim-result[]
;

trim
from employees | sort emp_no | limit 10 | eval name = concat(" ", first_name) | eval name = trim(first_name) | keep emp_no, name;
from employees | sort emp_no | limit 10 | eval name = concat(" ", first_name, " ") | eval name = trim(name) | eval name = concat("'", name, "'") | keep emp_no, name;

emp_no:integer | name:keyword
10001 | Georgi
10002 | Bezalel
10003 | Parto
10004 | Chirstian
10005 | Kyoichi
10006 | Anneke
10007 | Tzvetan
10008 | Saniya
10009 | Sumant
10010 | Duangkaew
10001 | 'Georgi'
10002 | 'Bezalel'
10003 | 'Parto'
10004 | 'Chirstian'
10005 | 'Kyoichi'
10006 | 'Anneke'
10007 | 'Tzvetan'
10008 | 'Saniya'
10009 | 'Sumant'
10010 | 'Duangkaew'
;

trimRow
// tag::trim[]
ROW message = " some text ", color = " red "| EVAL message = TRIM(message)| EVAL color = TRIM(color)
ROW message = " some text ", color = " red "
| EVAL message = TRIM(message)
| EVAL color = TRIM(color)
// end::trim[]
;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import java.lang.Override;
import java.lang.String;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.EvalOperator;

/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link LTrim}.
* This class is generated. Do not edit it.
*/
public final class LTrimEvaluator implements EvalOperator.ExpressionEvaluator {
private final EvalOperator.ExpressionEvaluator val;

public LTrimEvaluator(EvalOperator.ExpressionEvaluator val) {
this.val = val;
}

@Override
public Block eval(Page page) {
Block valUncastBlock = val.eval(page);
if (valUncastBlock.areAllValuesNull()) {
return Block.constantNullBlock(page.getPositionCount());
}
BytesRefBlock valBlock = (BytesRefBlock) valUncastBlock;
BytesRefVector valVector = valBlock.asVector();
if (valVector == null) {
return eval(page.getPositionCount(), valBlock);
}
return eval(page.getPositionCount(), valVector).asBlock();
}

public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) {
BytesRefBlock.Builder result = BytesRefBlock.newBlockBuilder(positionCount);
BytesRef valScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
if (valBlock.isNull(p) || valBlock.getValueCount(p) != 1) {
result.appendNull();
continue position;
}
result.appendBytesRef(LTrim.process(valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch)));
}
return result.build();
}

public BytesRefVector eval(int positionCount, BytesRefVector valVector) {
BytesRefVector.Builder result = BytesRefVector.newVectorBuilder(positionCount);
BytesRef valScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
result.appendBytesRef(LTrim.process(valVector.getBytesRef(p, valScratch)));
}
return result.build();
}

@Override
public String toString() {
return "LTrimEvaluator[" + "val=" + val + "]";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import java.lang.Override;
import java.lang.String;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.EvalOperator;

/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link RTrim}.
* This class is generated. Do not edit it.
*/
public final class RTrimEvaluator implements EvalOperator.ExpressionEvaluator {
private final EvalOperator.ExpressionEvaluator val;

public RTrimEvaluator(EvalOperator.ExpressionEvaluator val) {
this.val = val;
}

@Override
public Block eval(Page page) {
Block valUncastBlock = val.eval(page);
if (valUncastBlock.areAllValuesNull()) {
return Block.constantNullBlock(page.getPositionCount());
}
BytesRefBlock valBlock = (BytesRefBlock) valUncastBlock;
BytesRefVector valVector = valBlock.asVector();
if (valVector == null) {
return eval(page.getPositionCount(), valBlock);
}
return eval(page.getPositionCount(), valVector).asBlock();
}

public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) {
BytesRefBlock.Builder result = BytesRefBlock.newBlockBuilder(positionCount);
BytesRef valScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
if (valBlock.isNull(p) || valBlock.getValueCount(p) != 1) {
result.appendNull();
continue position;
}
result.appendBytesRef(RTrim.process(valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch)));
}
return result.build();
}

public BytesRefVector eval(int positionCount, BytesRefVector valVector) {
BytesRefVector.Builder result = BytesRefVector.newVectorBuilder(positionCount);
BytesRef valScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
result.appendBytesRef(RTrim.process(valVector.getBytesRef(p, valScratch)));
}
return result.build();
}

@Override
public String toString() {
return "RTrimEvaluator[" + "val=" + val + "]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.StartsWith;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring;
Expand Down Expand Up @@ -129,6 +131,8 @@ private FunctionDefinition[][] functions() {
def(Length.class, Length::new, "length"),
def(Substring.class, Substring::new, "substring"),
def(Concat.class, Concat::new, "concat"),
def(LTrim.class, LTrim::new, "ltrim"),
def(RTrim.class, RTrim::new, "rtrim"),
def(Trim.class, Trim::new, "trim"),
def(StartsWith.class, StartsWith::new, "starts_with") },
// date
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction;
import org.elasticsearch.xpack.esql.planner.Mappable;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.TypeResolutions;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;

import java.util.List;
import java.util.function.Function;
import java.util.function.Supplier;

import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;

/**
* Removes leading whitespaces from a string.
*/
public class LTrim extends UnaryScalarFunction implements Mappable {
public LTrim(Source source, Expression field) {
super(source, field);
}

@Override
protected TypeResolution resolveType() {
if (childrenResolved() == false) {
return new Expression.TypeResolution("Unresolved children");
}

return isString(field, sourceText(), TypeResolutions.ParamOrdinal.DEFAULT);
}

@Override
public Object fold() {
return Mappable.super.fold();
}

@Override
public Supplier<EvalOperator.ExpressionEvaluator> toEvaluator(
Function<Expression, Supplier<EvalOperator.ExpressionEvaluator>> toEvaluator
) {
Supplier<EvalOperator.ExpressionEvaluator> field = toEvaluator.apply(field());
return () -> new LTrimEvaluator(field.get());
}

@Override
public Expression replaceChildren(List<Expression> newChildren) {
return new LTrim(source(), newChildren.get(0));
}

@Override
protected NodeInfo<? extends Expression> info() {
return NodeInfo.create(this, LTrim::new, field());
}

@Evaluator
static BytesRef process(final BytesRef val) {
int offset = val.offset;
UnicodeUtil.UTF8CodePoint codePoint = new UnicodeUtil.UTF8CodePoint();
while (offset < val.offset + val.length) {
codePoint = UnicodeUtil.codePointAt(val.bytes, offset, codePoint);
if (Character.isWhitespace(codePoint.codePoint) == false) {
break;
}
offset += codePoint.numBytes;
}
return new BytesRef(val.bytes, offset, val.length + val.offset - offset);
}
}
Loading