Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Test and fix the NULL handling of the String functions #68379

Merged
merged 14 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/reference/sql/functions/string.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ CONCAT(

*Output*: string

*Description*: Returns a character string that is the result of concatenating `string_exp1` to `string_exp2`. If one of the string is `NULL`, the other string will be returned.
*Description*: Returns a character string that is the result of concatenating `string_exp1` to `string_exp2`. `NULL` input strings are treated as empty strings.

[source, sql]
--------------------------------------------------
Expand Down Expand Up @@ -228,7 +228,7 @@ LOCATE(

*Output*: integer

*Description*: Returns the starting position of the first occurrence of `pattern` within `source`. The search for the first occurrence of `pattern` begins with the first character position in `source` unless the optional argument, `start`, is specified. If `start` is specified, the search begins with the character position indicated by the value of `start`. The first character position in `source` is indicated by the value 1. If `pattern` is not found within `source`, the value 0 is returned.
*Description*: Returns the starting position of the first occurrence of `pattern` within `source`. The optional `start` specifies the character position to start the search with. The first character position in `source` is indicated by the value 1. Not specifying `start` or specifying it as `NULL`, any negative value, 0 or 1, starts the search at the first character position. If `pattern` is not found within `source`, the value 0 is returned.

[source, sql]
--------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.sql.qa.single_node;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.xpack.sql.qa.jdbc.JdbcIntegrationTestCase;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.LinkedHashMap;
Expand All @@ -23,7 +25,6 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Stream;

import static java.lang.String.join;
import static java.util.Arrays.asList;
Expand All @@ -33,16 +34,16 @@
import static org.hamcrest.collection.IsEmptyCollection.empty;

/**
* <p>This test was introduced because of the inconsistencies regarding NULL argument handling. NULL literal vs NULL field
* <p>This test was introduced because of the inconsistencies regarding NULL argument handling. NULL literal vs NULL field
* value as function arguments in some case result in different function return values.</p>
*
* <p>Functions should return with the same value no matter if the argument(s) came from a field or from a literal.</p>
*
* <p>The test class based on the example function calls (and argument specifications) generates all the
* permutations of the function arguments (4 options per argument: value/NULL as literal/field) and tests that the
* function calls with the same argument values provide the same result regardless of the source (literal, field)
*
* <p>The test class based on the example function calls (and argument specifications) generates all the
* permutations of the function arguments (4 options per argument: value/NULL as literal/field) and tests that the
* function calls with the same argument values provide the same result regardless of the source (literal, field)
* of the arguments.</p>
*
*
* <p>To ignore any of the tests, add an .ignore() method call after the Fn ctors in the FUNCTION_CALLS_TO_TEST list below, like:
* <code> new Fn("ASCII", "foobar").ignore()</code></p>
*/
Expand Down Expand Up @@ -73,51 +74,51 @@ public class ConsistentFunctionArgHandlingIT extends JdbcIntegrationTestCase {
new Fn("UCASE", "foobar")
);

private static final List<String> NON_TESTED_FUNCTIONS = asList(
"CURDATE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", "CURTIME", "DATEADD", "DATEDIFF", "DATEPART",
"DATETIME_FORMAT", "DATETIME_PARSE", "DATETRUNC", "DATE_ADD", "DATE_DIFF", "DATE_PARSE", "DATE_PART", "DATE_TRUNC", "DAY",
"DAYNAME", "DAYOFMONTH", "DAYOFWEEK", "DAYOFYEAR", "DAY_NAME", "DAY_OF_MONTH", "DAY_OF_WEEK", "DAY_OF_YEAR", "DOM", "DOW", "DOY",
"FORMAT", "HOUR", "HOUR_OF_DAY", "IDOW", "ISODAYOFWEEK", "ISODOW", "ISOWEEK", "ISOWEEKOFYEAR", "ISO_DAY_OF_WEEK",
"ISO_WEEK_OF_YEAR", "IW", "IWOY", "MINUTE", "MINUTE_OF_DAY", "MINUTE_OF_HOUR", "MONTH", "MONTHNAME", "MONTH_NAME",
"MONTH_OF_YEAR", "NOW", "QUARTER", "SECOND", "SECOND_OF_MINUTE", "TIMESTAMPADD", "TIMESTAMPDIFF", "TIMESTAMP_ADD",
"TIMESTAMP_DIFF", "TIME_PARSE", "TODAY", "TO_CHAR", "WEEK", "WEEK_OF_YEAR", "YEAR", "ABS", "ACOS", "ASIN", "ATAN",
"ATAN2", "CBRT", "CEIL", "CEILING", "COS", "COSH", "COT", "DEGREES", "E", "EXP", "EXPM1", "FLOOR", "LOG", "LOG10",
"MOD", "PI", "POWER", "RADIANS", "RAND", "RANDOM", "ROUND", "SIGN", "SIGNUM", "SIN", "SINH", "SQRT", "TAN", "TRUNC",
"TRUNCATE", "CAST", "CONVERT", "DATABASE", "USER", "ST_ASTEXT", "ST_ASWKT", "ST_DISTANCE",
"ST_GEOMETRYTYPE", "ST_GEOMFROMTEXT", "ST_WKTTOSQL", "ST_X", "ST_Y", "ST_Z");

private enum Source { FIELD, LITERAL }
private static final List<String> NON_TESTED_FUNCTIONS;
static {
try {
Class<?> c = ConsistentFunctionArgHandlingIT.class;
NON_TESTED_FUNCTIONS = Files.readAllLines(Path.of(c.getResource(c.getSimpleName() + "-non-tested-functions.txt").toURI()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be great if it's possible get all the functions from FunctionRegistry:
Something like: https://github.com/elastic/elasticsearch/blob/master/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java#L2199

Can also be done at a later point and not as part of this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SqlFunctionRegistry is not available in integration tests (can add it as a dependency, but I don't think we really need it). I get the list of the functions from SHOW FUNCTIONS see here, that achieves pretty much the same that SqlFunctionRegistry would (for more see the last paragraph here). The purpose of the NON_TESTED_FUNCTIONS list is to identify functions added after writing this test class which likely we should think about adding to the tested function list / non tested function list (one-liner). Newly added = SHOW FUNCTIONS \ FUNCTIONS_CALLS \ NON_TESTED_FUNCTIONS should be an empty set. The NON_TESTED_FUNCTIONS list should shrink later once the datetime, ... functions are also added to the tested function list (separate PR).

} catch (Exception ex) {
throw new RuntimeException(ex);
}
}

private enum Source {
FIELD,
LITERAL
}

private static class Fn {
private final String name;
private final List<Argument> arguments;
private List<String> aliases = new ArrayList<>();
private boolean ignored = false;

private Fn(String name, Object... arguments) {
this.name = name;
this.arguments = new ArrayList<>();
for (Object a : arguments) {
this.arguments.add(Argument.class.isAssignableFrom(a.getClass()) ? (Argument) a : new Argument(a));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When can there already be an Argument sent as a parameter to this constructor? I am only seeing non-Argument instances being used....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously I used for one of the cases (LOCATE). Now this is unused and can be removed, but might come handy if we have a restriction that some of the arguments cannot come from fields.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@palesz I would suggest removing it and use it when needed. Someone looking at the code gets confused about a functionality that's not actually there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.

}
}

public Fn aliases(String... aliases) {
this.aliases = asList(aliases);
return this;
}
}

public Fn ignore() {
this.ignored = true;
return this;
}

@Override
public String toString() {
return name + "(" + arguments.stream().map(a -> String.valueOf(a.exampleValue)).collect(joining(", ")) + ")";
}
}

private static class Argument {
private final Object exampleValue;
private final Source[] acceptedSources;
Expand All @@ -127,27 +128,27 @@ private Argument(Object exampleValue, Source... acceptedSources) {
this.acceptedSources = acceptedSources.length == 0 ? Source.values() : acceptedSources;
}
}

@ParametersFactory
public static Iterable<Object[]> testFactory() {
List<Object[]> tests = new ArrayList<>();
tests.add(new Object[]{ null });
FUNCTION_CALLS_TO_TEST.forEach(f -> tests.add(new Object[]{ f }));
tests.add(new Object[] { null });
FUNCTION_CALLS_TO_TEST.forEach(f -> tests.add(new Object[] { f }));
return tests;
}

private final Fn fn;

public ConsistentFunctionArgHandlingIT(Fn fn) {
this.fn = fn;
}

public void test() throws Exception {
if (fn == null) {
checkScalarFunctionCoverage();
return;
}

assumeFalse("Ignored", fn.ignored);

// create a record for the function, where all the example (non-null) argument values are stored in fields
Expand All @@ -157,20 +158,20 @@ public void test() throws Exception {
final String argPrefix = "arg_" + functionName + "_";
final String nullArgPrefix = "arg_null_" + functionName + "_";
final String testDocId = functionName + "_" + UUIDs.base64UUID();

indexTestDocForFunction(functionName, indexName, argPrefix, nullArgPrefix, testDocId);

List<List<Object>> possibleValuesPerArguments = fn.arguments.stream().map(a -> asList(a.exampleValue, null)).collect(toList());
List<List<Source>> acceptedSourcesPerArguments = fn.arguments.stream().map(a -> asList(a.acceptedSources)).collect(toList());

iterateAllPermutations(possibleValuesPerArguments, argValues -> {
// we only want to check the function calls that have at least a single NULL argument
if (argValues.stream().noneMatch(Objects::isNull)) {
return;
}

List<Tuple<String, Object>> results = new ArrayList<>();

iterateAllPermutations(acceptedSourcesPerArguments, argSources -> {
List<String> functionCallArgs = new ArrayList<>();
List<String> functionCallArgsForAssert = new ArrayList<>();
Expand All @@ -193,18 +194,17 @@ public void test() throws Exception {
final String functionCall = functionName + "(" + join(", ", functionCallArgs) + ")";
final String query = "SELECT " + functionCall + " FROM " + indexName + " WHERE docId = '" + testDocId + "'";
ResultSet retVal = esJdbc().createStatement().executeQuery(query);

assertTrue(retVal.next());
results.add(tuple(functionName + "(" + join(", ", functionCallArgsForAssert) + ")", retVal.getObject(1)));
// only a single row should be returned
assertFalse(retVal.next());

Stream<Object> returnValueStream = results.stream().map(Tuple::v2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you remove this and now go over the stream components twice?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly streams can only be iterated once, during the second iteration it gave stream is already closed exception.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did the tests pass then?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the second iteration only happens if the test finds an issue (to create the detailed message for error).

if (returnValueStream.distinct().count() > 1) {
int maxResultWidth = returnValueStream.mapToInt(o -> asLiteralInQuery(o).length()).max().orElse(20);
if (results.stream().map(Tuple::v2).distinct().count() > 1) {
int maxResultWidth = results.stream().map(Tuple::v2).mapToInt(o -> asLiteralInQuery(o).length()).max().orElse(20);
String resultsAsString = results.stream()
.map(r -> String.format(Locale.ROOT, "%2$-" + maxResultWidth + "s // %1$s", r.v1(), asLiteralInQuery(r.v2())))
.collect(joining("\n"));
.map(r -> String.format(Locale.ROOT, "%2$-" + maxResultWidth + "s // %1$s", r.v1(), asLiteralInQuery(r.v2())))
.collect(joining("\n"));
fail("The result of the last call differs from the other calls:\n" + resultsAsString);
}
});
Expand Down Expand Up @@ -246,9 +246,8 @@ private void checkScalarFunctionCoverage() throws Exception {
functions.removeAll(fn.aliases);
}
functions.removeAll(NON_TESTED_FUNCTIONS);

assertThat("Missing function checks: [" + functions.stream().map(s -> "\"" + s + "\"").collect(joining(", ")) + "]",
functions, empty());

assertThat("Some functions are not covered by this test", functions, empty());
}

private static String asLiteralInQuery(Object argValue) {
Expand All @@ -264,9 +263,9 @@ private static String asLiteralInQuery(Object argValue) {
return argInQuery;
}

private static <T> void iterateAllPermutations(List<List<T>> possibleValuesPerItem, CheckedConsumer<List<T>, Exception> consumer)
private static <T> void iterateAllPermutations(List<List<T>> possibleValuesPerItem, CheckedConsumer<List<T>, Exception> consumer)
throws Exception {

if (possibleValuesPerItem.isEmpty()) {
consumer.accept(new ArrayList<>());
return;
Expand All @@ -280,6 +279,5 @@ private static <T> void iterateAllPermutations(List<List<T>> possibleValuesPerIt
}
});
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
CURDATE
CURRENT_DATE
CURRENT_TIME
CURRENT_TIMESTAMP
CURTIME
DATEADD
DATEDIFF
DATEPART
DATETIME_FORMAT
DATETIME_PARSE
DATETRUNC
DATE_ADD
DATE_DIFF
DATE_PARSE
DATE_PART
DATE_TRUNC
DAY
DAYNAME
DAYOFMONTH
DAYOFWEEK
DAYOFYEAR
DAY_NAME
DAY_OF_MONTH
DAY_OF_WEEK
DAY_OF_YEAR
DOM
DOW
DOY
FORMAT
HOUR
HOUR_OF_DAY
IDOW
ISODAYOFWEEK
ISODOW
ISOWEEK
ISOWEEKOFYEAR
ISO_DAY_OF_WEEK
ISO_WEEK_OF_YEAR
IW
IWOY
MINUTE
MINUTE_OF_DAY
MINUTE_OF_HOUR
MONTH
MONTHNAME
MONTH_NAME
MONTH_OF_YEAR
NOW
QUARTER
SECOND
SECOND_OF_MINUTE
TIMESTAMPADD
TIMESTAMPDIFF
TIMESTAMP_ADD
TIMESTAMP_DIFF
TIME_PARSE
TODAY
TO_CHAR
WEEK
WEEK_OF_YEAR
YEAR
ABS
ACOS
ASIN
ATAN
ATAN2
CBRT
CEIL
CEILING
COS
COSH
COT
DEGREES
E
EXP
EXPM1
FLOOR
LOG
LOG10
MOD
PI
POWER
RADIANS
RAND
RANDOM
ROUND
SIGN
SIGNUM
SIN
SINH
SQRT
TAN
TRUNC
TRUNCATE
CAST
CONVERT
DATABASE
USER
ST_ASTEXT
ST_ASWKT
ST_DISTANCE
ST_GEOMETRYTYPE
ST_GEOMFROMTEXT
ST_WKTTOSQL
ST_X
ST_Y
ST_Z