Skip to content

Commit

Permalink
SQL: Implement TRIM function (#57518)
Browse files Browse the repository at this point in the history
Add `TRIM` function which combines the functionality of both
`LTRIM` and `RTRIM` by stripping both leading and trailing
whitespaces.

Refers to #41195
  • Loading branch information
matriv authored Jun 3, 2020
1 parent a933867 commit 6c86c91
Show file tree
Hide file tree
Showing 13 changed files with 200 additions and 12 deletions.
1 change: 1 addition & 0 deletions docs/reference/sql/functions/index.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@
** <<sql-functions-string-rtrim>>
** <<sql-functions-string-space>>
** <<sql-functions-string-substring>>
** <<sql-functions-string-trim>>
** <<sql-functions-string-ucase>>
* <<sql-functions-type-conversion>>
** <<sql-functions-type-conversion-cast>>
Expand Down
20 changes: 20 additions & 0 deletions docs/reference/sql/functions/string.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,26 @@ SUBSTRING(
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[stringSubString]
--------------------------------------------------
[[sql-functions-string-trim]]
==== `TRIM`

.Synopsis:
[source, sql]
--------------------------------------------------
TRIM(string_exp) <1>
--------------------------------------------------
*Input*:

<1> string expression

*Output*: string

*Description*: Returns the characters of `string_exp`, with leading and trailing blanks removed.

[source, sql]
--------------------------------------------------
include-tagged::{sql-specs}/docs/docs.csv-spec[stringTrim]
--------------------------------------------------

[[sql-functions-string-ucase]]
==== `UCASE`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ RIGHT |SCALAR
RTRIM |SCALAR
SPACE |SCALAR
STARTS_WITH |SCALAR
SUBSTRING |SCALAR
SUBSTRING |SCALAR
TRIM |SCALAR
UCASE |SCALAR
CAST |SCALAR
CONVERT |SCALAR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,8 @@ RIGHT |SCALAR
RTRIM |SCALAR
SPACE |SCALAR
STARTS_WITH |SCALAR
SUBSTRING |SCALAR
SUBSTRING |SCALAR
TRIM |SCALAR
UCASE |SCALAR
CAST |SCALAR
CONVERT |SCALAR
Expand Down Expand Up @@ -1842,6 +1843,16 @@ Elastic
// end::stringSubString
;

stringTrim
// tag::stringTrim
SELECT TRIM(' Elastic ') AS trimmed;

trimmed
--------------
Elastic
// end::stringTrim
;

stringUCase
// tag::stringUCase
SELECT UCASE('Elastic');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ SELECT SUBSTRING('Elasticsearch', 1, 15) sub;
substringInline3
SELECT SUBSTRING('Elasticsearch', 10, 10) sub;

trimFilter
SELECT TRIM(CONCAT(CONCAT(' ', first_name), ' ')) trimmed FROM "test_emp" WHERE TRIM(CONCAT(CONCAT(' ', first_name), ' ')) = 'Bob';

trimInline
SELECT TRIM(' Elastic ') trimmed1, TRIM(' ') trimmed2;

ucaseFilter
SELECT UCASE(gender) uppercased, COUNT(*) count FROM "test_emp" WHERE UCASE(gender) = 'F' GROUP BY UCASE(gender);

Expand Down Expand Up @@ -188,6 +194,17 @@ SELECT RTRIM(first_name) rt FROM "test_emp" GROUP BY RTRIM(first_name) HAVING CO
ltrimGroupByAndOrderBy
SELECT LTRIM(first_name) lt FROM "test_emp" GROUP BY LTRIM(first_name) HAVING COUNT(*)>1;

trimOrderBy
SELECT TRIM(CONCAT(CONCAT(' ', first_name), ' ')) trimmed FROM "test_emp" ORDER BY 1;

trimGroupBy
SELECT TRIM(CONCAT(CONCAT(' ', first_name), ' ')) trimmed FROM "test_emp" GROUP BY TRIM(CONCAT(CONCAT(' ', first_name), ' ')) ORDER BY 1;

// Having on MAX/MIN(<string>) not supported: https://github.com/elastic/elasticsearch/issues/37938
trimGroupByAndHaving-Ignore
SELECT MAX(CONCAT(CONCAT(' ', first_name), ' ')) max trimmed FROM "test_emp" GROUP BY gender HAVING MAX(CONCAT(CONCAT(' ', gender), ' ')) = 'Zvonko' ORDER BY 1;


spaceGroupByWithCharLength
SELECT CAST(CHAR_LENGTH(SPACE(languages)) AS INT) cls FROM "test_emp" GROUP BY CHAR_LENGTH(SPACE(languages)) ORDER BY CHAR_LENGTH(SPACE(languages)) ASC;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Right;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Space;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Substring;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Trim;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.UCase;
import org.elasticsearch.xpack.sql.expression.predicate.conditional.Case;
import org.elasticsearch.xpack.sql.expression.predicate.conditional.Coalesce;
Expand Down Expand Up @@ -243,6 +244,7 @@ private static FunctionDefinition[][] functions() {
def(Space.class, Space::new, "SPACE"),
def(StartsWith.class, StartsWith::new, "STARTS_WITH"),
def(Substring.class, Substring::new, "SUBSTRING"),
def(Trim.class, Trim::new, "TRIM"),
def(UCase.class, UCase::new, "UCASE")
},
// DataType conversion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import static org.elasticsearch.common.Strings.hasLength;

abstract class StringFunctionUtils {
final class StringFunctionUtils {

private StringFunctionUtils() {}

/**
* Extract a substring from the given string, using start index and length of the extracted substring.
Expand Down Expand Up @@ -41,7 +43,7 @@ static String substring(String s, int start, int length) {
* @return the resulting String
*/
static String trimTrailingWhitespaces(String s) {
if (!hasLength(s)) {
if (hasLength(s) == false) {
return s;
}

Expand All @@ -60,7 +62,7 @@ static String trimTrailingWhitespaces(String s) {
* @return the resulting String
*/
static String trimLeadingWhitespaces(String s) {
if (!hasLength(s)) {
if (hasLength(s) == false) {
return s;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public enum StringOperation {
LENGTH((String s) -> StringFunctionUtils.trimTrailingWhitespaces(s).length()),
RTRIM((String s) -> StringFunctionUtils.trimTrailingWhitespaces(s)),
LTRIM((String s) -> StringFunctionUtils.trimLeadingWhitespaces(s)),
TRIM(String::trim),
SPACE((Number n) -> {
int i = n.intValue();
if (i < 0) {
Expand Down Expand Up @@ -141,4 +142,4 @@ public int hashCode() {
public String toString() {
return processor.toString();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.scalar.string;

import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataType;
import org.elasticsearch.xpack.ql.type.DataTypes;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;

/**
* Trims both leading and trailing whitespaces.
*/
public class Trim extends UnaryStringFunction {

public Trim(Source source, Expression field) {
super(source, field);
}

@Override
protected NodeInfo<Trim> info() {
return NodeInfo.create(this, Trim::new, field());
}

@Override
protected Trim replaceChild(Expression newChild) {
return new Trim(source(), newChild);
}

@Override
protected StringOperation operation() {
return StringOperation.TRIM;
}

@Override
public DataType dataType() {
return DataTypes.KEYWORD;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,10 @@ public static String substring(String s, Number start, Number length) {
return (String) SubstringFunctionProcessor.doProcess(s, start, length);
}

public static String trim(String s) {
return (String) StringOperation.TRIM.apply(s);
}

public static String ucase(String s) {
return (String) StringOperation.UCASE.apply(s);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ class org.elasticsearch.xpack.sql.expression.function.scalar.whitelist.InternalS
String rtrim(String)
String space(Number)
String substring(String, Number, Number)
String trim(String)
String ucase(String)

#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,10 @@ public void testRTrim() {
assertNull(proc.process(null));
assertEquals("foo bar", proc.process("foo bar"));
assertEquals("", proc.process(""));
assertEquals("", proc.process(" "));
assertEquals("foo bar", proc.process("foo bar "));
assertEquals(" foo bar", proc.process(" foo bar "));
assertEquals("", proc.process(withRandomWhitespaces(" \t \r\n \n ", true, true)));
assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", false, true)));
assertEquals(" foo bar", proc.process(withRandomWhitespaces(" foo bar", false, true)));
assertEquals(" \t \n \r\n foo \t \r\n \n bar", proc.process(withRandomWhitespaces(" \t \n \r\n foo \t \r\n \n bar", false, true)));
assertEquals("f", proc.process('f'));

stringCharInputValidation(proc);
Expand All @@ -161,9 +162,25 @@ public void testLTrim() {
assertNull(proc.process(null));
assertEquals("foo bar", proc.process("foo bar"));
assertEquals("", proc.process(""));
assertEquals("", proc.process(" "));
assertEquals("foo bar", proc.process(" foo bar"));
assertEquals("foo bar ", proc.process(" foo bar "));
assertEquals("", proc.process(withRandomWhitespaces(" \t \r\n \n ", true, true)));
assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", true, false)));
assertEquals("foo bar ", proc.process(withRandomWhitespaces("foo bar ", true, false)));
assertEquals("foo \t \r\n \n bar \t \r\n \n ", proc.process(withRandomWhitespaces("foo \t \r\n \n bar \t \r\n \n ", true, false)));
assertEquals("f", proc.process('f'));

stringCharInputValidation(proc);
}

public void testTrim() {
StringProcessor proc = new StringProcessor(StringOperation.TRIM);
assertNull(proc.process(null));
assertEquals("foo bar", proc.process("foo bar"));
assertEquals("", proc.process(""));
assertEquals("", proc.process(withRandomWhitespaces(" \t \r\n \n ", true, true)));
assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", true, false)));
assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", false, true)));
assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar",true, true)));
assertEquals("foo \t \r\n \n bar", proc.process(withRandomWhitespaces("foo \t \r\n \n bar", true, true)));
assertEquals("f", proc.process('f'));

stringCharInputValidation(proc);
Expand Down Expand Up @@ -215,4 +232,22 @@ public void testOctetLength() {

stringCharInputValidation(proc);
}

private static String withRandomWhitespaces(String str, boolean prefix, boolean suffix) {
if (prefix == false && suffix == false) {
return str;
}

StringBuilder sb = new StringBuilder(str);
int noWhitespaces = randomIntBetween(1, 100);
for (int i = 0; i < noWhitespaces; i++) {
if (prefix) {
sb.insert(0, randomFrom(" ", "\t", "\n", "\r", "\r\n"));
}
if (suffix) {
sb.append(randomFrom(" ", "\t", "\n", "\r", "\r\n"));
}
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@
import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeProcessor.DateTimeExtractor;
import org.elasticsearch.xpack.sql.expression.function.scalar.math.MathProcessor.MathOperation;
import org.elasticsearch.xpack.sql.expression.function.scalar.math.Round;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.RTrim;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.Trim;
import org.elasticsearch.xpack.sql.expression.function.scalar.string.UnaryStringFunction;
import org.elasticsearch.xpack.sql.optimizer.Optimizer;
import org.elasticsearch.xpack.sql.parser.SqlParser;
import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec;
Expand Down Expand Up @@ -741,6 +745,52 @@ public void testStartsWithUsesPrefixQueryAndScript() {
assertEquals("[{v=keyword}, {v=xyz}, {v=true}]", sq.script().params().toString());
}

@SuppressWarnings("unchecked")
public void testTrim_WhereClause_Painless() {
Class<? extends UnaryStringFunction> trimFunction = randomFrom(Trim.class, LTrim.class, RTrim.class);
String trimFunctionName = trimFunction.getSimpleName().toUpperCase(Locale.ROOT);
LogicalPlan p = plan("SELECT " + trimFunctionName + "(keyword) trimmed FROM test WHERE " + trimFunctionName + "(keyword) = 'foo'");

assertTrue(p instanceof Project);
p = ((Project) p).child();
assertTrue(p instanceof Filter);
Expression condition = ((Filter) p).condition();
QueryTranslation qt = translate(condition);
assertTrue(qt.query instanceof ScriptQuery);
ScriptQuery sc = (ScriptQuery) qt.query;
assertEquals("InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.eq(InternalSqlScriptUtils." +
trimFunctionName.toLowerCase(Locale.ROOT) + "(InternalQlScriptUtils.docValue(doc,params.v0)),params.v1))",
sc.script().toString());
assertEquals("[{v=keyword}, {v=foo}]", sc.script().params().toString());
}

@SuppressWarnings("unchecked")
public void testTrim_GroupBy_Painless() {
Class<? extends UnaryStringFunction> trimFunction = randomFrom(Trim.class, LTrim.class, RTrim.class);
String trimFunctionName = trimFunction.getSimpleName().toUpperCase(Locale.ROOT);
LogicalPlan p = plan("SELECT " + trimFunctionName + "(keyword) trimmed, count(*) FROM test GROUP BY " +
trimFunctionName + "(keyword)");

assertEquals(Aggregate.class, p.getClass());
Aggregate agg = (Aggregate) p;
assertEquals(1, agg.groupings().size());
assertEquals(2, agg.aggregates().size());
assertEquals(trimFunction, agg.groupings().get(0).getClass());
assertEquals(trimFunction, ((Alias) agg.aggregates().get(0)).child().getClass());
assertEquals(Count.class,((Alias) agg.aggregates().get(1)).child().getClass());

UnaryStringFunction trim = (UnaryStringFunction) agg.groupings().get(0);
assertEquals(1, trim.children().size());

GroupingContext groupingContext = QueryFolder.FoldAggregate.groupBy(agg.groupings());
assertNotNull(groupingContext);
ScriptTemplate scriptTemplate = groupingContext.tail.script();
assertEquals("InternalSqlScriptUtils." + trimFunctionName.toLowerCase(Locale.ROOT) +
"(InternalQlScriptUtils.docValue(doc,params.v0))",
scriptTemplate.toString());
assertEquals("[{v=keyword}]", scriptTemplate.params().toString());
}

public void testTranslateNotExpression_WhereClause_Painless() {
LogicalPlan p = plan("SELECT * FROM test WHERE NOT(POSITION('x', keyword) = 0)");
assertTrue(p instanceof Project);
Expand Down

0 comments on commit 6c86c91

Please sign in to comment.