Skip to content

Commit

Permalink
SQL: Add multi_value_field_leniency inside FieldHitExtractor (#40113)
Browse files Browse the repository at this point in the history
For cases where fields can have multi values, allow the behavior to be
customized through a dedicated configuration field.
By default this will be enabled on the drivers so that existing datasets
work instead of throwing an exception.
For regular SQL usage, the behavior is false so that the user is aware
of the underlying data.

Fix #39700
  • Loading branch information
costin authored Mar 18, 2019
1 parent ad90055 commit 2b35157
Show file tree
Hide file tree
Showing 27 changed files with 278 additions and 116 deletions.
4 changes: 4 additions & 0 deletions docs/reference/sql/endpoints/jdbc.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ Query timeout (in seconds). That is the maximum amount of time waiting for a que

`proxy.socks`:: SOCKS proxy host name

[float]
==== Mapping
`field.multi.value.leniency` (default `true`):: Whether to be lenient and return the first value for fields with multiple values (true) or throw an exception.

[float]
==== Additional

Expand Down
4 changes: 4 additions & 0 deletions docs/reference/sql/endpoints/rest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,10 @@ More information available https://docs.oracle.com/javase/8/docs/api/java/time/Z
|false
|Return the results in a columnar fashion, rather than row-based fashion. Valid for `json`, `yaml`, `cbor` and `smile`.

|field_multi_value_leniency
|false
|Throw an exception when encountering multiple values for a field (default) or be lenient and return the first value from the list (without any guarantees of what that will be - typically the first in natural ascending order).

|===

Do note that most parameters (outside the timeout and `columnar` ones) make sense only during the initial query - any follow-up pagination request only requires the `cursor` parameter as explained in the <<sql-pagination, pagination>> chapter.
Expand Down
2 changes: 2 additions & 0 deletions docs/reference/sql/limitations.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ pagination taking place on the **root nested document and not on its inner hits*

Array fields are not supported due to the "invisible" way in which {es} handles an array of values: the mapping doesn't indicate whether
a field is an array (has multiple values) or not, so without reading all the data, {es-sql} cannot know whether a field is a single or multi value.
When multiple values are returned for a field, by default, {es-sql} will throw an exception. However, it is possible to change this behavior through `field_multi_value_leniency` parameter in REST (disabled by default) or
`field.multi.value.leniency` in drivers (enabled by default).

[float]
=== Sorting by aggregation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,25 @@ class JdbcConfiguration extends ConnectionConfiguration {
// can be out/err/url
static final String DEBUG_OUTPUT_DEFAULT = "err";

public static final String TIME_ZONE = "timezone";
static final String TIME_ZONE = "timezone";
// follow the JDBC spec and use the JVM default...
// to avoid inconsistency, the default is picked up once at startup and reused across connections
// to cater to the principle of least surprise
// really, the way to move forward is to specify a calendar or the timezone manually
static final String TIME_ZONE_DEFAULT = TimeZone.getDefault().getID();

static final String FIELD_MULTI_VALUE_LENIENCY = "field.multi.value.leniency";
static final String FIELD_MULTI_VALUE_LENIENCY_DEFAULT = "true";


// options that don't change at runtime
private static final Set<String> OPTION_NAMES = new LinkedHashSet<>(Arrays.asList(TIME_ZONE, DEBUG, DEBUG_OUTPUT));
private static final Set<String> OPTION_NAMES = new LinkedHashSet<>(
Arrays.asList(TIME_ZONE, FIELD_MULTI_VALUE_LENIENCY, DEBUG, DEBUG_OUTPUT));

static {
// trigger version initialization
// typically this should have already happened but in case the
// JdbcDriver/JdbcDataSource are not used and the impl. classes used directly
// EsDriver/EsDataSource are not used and the impl. classes used directly
// this covers that case
Version.CURRENT.toString();
}
Expand All @@ -71,6 +76,7 @@ class JdbcConfiguration extends ConnectionConfiguration {

// mutable ones
private ZoneId zoneId;
private boolean fieldMultiValueLeniency;

public static JdbcConfiguration create(String u, Properties props, int loginTimeoutSeconds) throws JdbcSQLException {
URI uri = parseUrl(u);
Expand Down Expand Up @@ -151,6 +157,8 @@ private JdbcConfiguration(URI baseURI, String u, Properties props) throws JdbcSQ

this.zoneId = parseValue(TIME_ZONE, props.getProperty(TIME_ZONE, TIME_ZONE_DEFAULT),
s -> TimeZone.getTimeZone(s).toZoneId().normalized());
this.fieldMultiValueLeniency = parseValue(FIELD_MULTI_VALUE_LENIENCY,
props.getProperty(FIELD_MULTI_VALUE_LENIENCY, FIELD_MULTI_VALUE_LENIENCY_DEFAULT), Boolean::parseBoolean);
}

@Override
Expand All @@ -174,6 +182,10 @@ public TimeZone timeZone() {
return zoneId != null ? TimeZone.getTimeZone(zoneId) : null;
}

public boolean fieldMultiValueLeniency() {
return fieldMultiValueLeniency;
}

public static boolean canAccept(String url) {
return (StringUtils.hasText(url) && url.trim().startsWith(JdbcConfiguration.URL_PREFIX));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,15 @@ boolean ping(long timeoutInMs) throws SQLException {

Cursor query(String sql, List<SqlTypedParamValue> params, RequestMeta meta) throws SQLException {
int fetch = meta.fetchSize() > 0 ? meta.fetchSize() : conCfg.pageSize();
SqlQueryRequest sqlRequest = new SqlQueryRequest(sql, params, null, conCfg.zoneId(),
SqlQueryRequest sqlRequest = new SqlQueryRequest(sql, params, conCfg.zoneId(),
fetch,
TimeValue.timeValueMillis(meta.timeoutInMs()), TimeValue.timeValueMillis(meta.queryTimeoutInMs()),
false, new RequestInfo(Mode.JDBC));
TimeValue.timeValueMillis(meta.timeoutInMs()),
TimeValue.timeValueMillis(meta.queryTimeoutInMs()),
null,
Boolean.FALSE,
null,
new RequestInfo(Mode.JDBC),
conCfg.fieldMultiValueLeniency());
SqlQueryResponse response = httpClient.query(sqlRequest);
return new DefaultCursor(this, response.cursor(), toJdbcColumnInfo(response.columns()), response.rows(), meta);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,33 @@ public static String elasticsearchAddress() {
}

public Connection esJdbc() throws SQLException {
return randomBoolean() ? useDriverManager() : useDataSource();
return esJdbc(connectionProperties());
}

public Connection esJdbc(Properties props) throws SQLException {
return createConnection(props);
}

protected Connection createConnection(Properties connectionProperties) throws SQLException {
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
String address = "jdbc:es://" + elasticsearchAddress;
Connection connection = null;
if (randomBoolean()) {
connection = DriverManager.getConnection(address, connectionProperties);
} else {
EsDataSource dataSource = new EsDataSource();
dataSource.setUrl(address);
dataSource.setProperties(connectionProperties);
connection = dataSource.getConnection();
}

assertNotNull("The timezone should be specified", connectionProperties.getProperty("timezone"));
return connection;
}

//
// methods below are used inside the documentation only
//
protected Connection useDriverManager() throws SQLException {
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
// tag::connect-dm
Expand Down Expand Up @@ -114,6 +138,8 @@ protected String clusterName() {
protected Properties connectionProperties() {
Properties connectionProperties = new Properties();
connectionProperties.put("timezone", randomKnownTimeZone());
// in the tests, don't be lenient towards multi values
connectionProperties.put("field.multi.value.leniency", "false");
return connectionProperties;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
import org.elasticsearch.common.CheckedBiFunction;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.sql.jdbc.EsDataSource;
import org.elasticsearch.xpack.sql.jdbc.EsType;

import java.io.IOException;
Expand All @@ -22,7 +22,6 @@
import java.sql.Blob;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.NClob;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
Expand Down Expand Up @@ -80,6 +79,34 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
dateTimeTestingFields.put(new Tuple<String, Object>("test_keyword", "true"), EsType.KEYWORD);
}

public void testMultiValueFieldWithMultiValueLeniencyEnabled() throws Exception {
createTestDataForMultiValueTests();

doWithQuery(() -> esWithLeniency(true), "SELECT int, keyword FROM test", (results) -> {
results.next();
Object number = results.getObject(1);
Object string = results.getObject(2);
assertEquals(-10, number);
assertEquals("-10", string);
assertFalse(results.next());
});
}

public void testMultiValueFieldWithMultiValueLeniencyDisabled() throws Exception {
createTestDataForMultiValueTests();

SQLException expected = expectThrows(SQLException.class,
() -> doWithQuery(() -> esWithLeniency(false), "SELECT int, keyword FROM test", (results) -> {
}));
assertTrue(expected.getMessage().contains("Arrays (returned by [int]) are not supported"));

// default has multi value disabled
expected = expectThrows(SQLException.class,
() -> doWithQuery(() -> esJdbc(), "SELECT int, keyword FROM test", (results) -> {
}));

}

// Byte values testing
public void testGettingValidByteWithoutCasting() throws Exception {
byte random1 = randomByte();
Expand Down Expand Up @@ -1132,7 +1159,7 @@ public void testValidGetObjectCalls() throws Exception {
/*
* Checks StackOverflowError fix for https://github.com/elastic/elasticsearch/pull/31735
*/
public void testNoInfiniteRecursiveGetObjectCalls() throws SQLException, IOException {
public void testNoInfiniteRecursiveGetObjectCalls() throws Exception {
index("library", "1", builder -> {
builder.field("name", "Don Quixote");
builder.field("page_count", 1072);
Expand Down Expand Up @@ -1303,17 +1330,16 @@ public void testUnsupportedUpdateMethods() throws IOException, SQLException {
}

private void doWithQuery(String query, CheckedConsumer<ResultSet, SQLException> consumer) throws SQLException {
try (Connection connection = esJdbc()) {
try (PreparedStatement statement = connection.prepareStatement(query)) {
try (ResultSet results = statement.executeQuery()) {
consumer.accept(results);
}
}
}
doWithQuery(() -> esJdbc(), query, consumer);
}

private void doWithQueryAndTimezone(String query, String tz, CheckedConsumer<ResultSet, SQLException> consumer) throws SQLException {
try (Connection connection = esJdbc(tz)) {
doWithQuery(() -> esJdbc(tz), query, consumer);
}

private void doWithQuery(CheckedSupplier<Connection, SQLException> con, String query, CheckedConsumer<ResultSet, SQLException> consumer)
throws SQLException {
try (Connection connection = con.get()) {
try (PreparedStatement statement = connection.prepareStatement(query)) {
try (ResultSet results = statement.executeQuery()) {
consumer.accept(results);
Expand Down Expand Up @@ -1355,7 +1381,29 @@ protected static void updateMapping(String index, CheckedConsumer<XContentBuilde
client().performRequest(request);
}

private void createTestDataForByteValueTests(byte random1, byte random2, byte random3) throws Exception, IOException {
private void createTestDataForMultiValueTests() throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("int").field("type", "integer").endObject();
builder.startObject("keyword").field("type", "keyword").endObject();
});

Integer[] values = randomArray(3, 15, s -> new Integer[s], () -> Integer.valueOf(randomInt(50)));
// add the minimal value in the middle yet the test will pick it up since the results are sorted
values[2] = Integer.valueOf(-10);

String[] stringValues = new String[values.length];
for (int i = 0; i < values.length; i++) {
stringValues[i] = String.valueOf(values[i]);
}

index("test", "1", builder -> {
builder.array("int", (Object[]) values);
builder.array("keyword", stringValues);
});
}

private void createTestDataForByteValueTests(byte random1, byte random2, byte random3) throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("test_byte").field("type", "byte").endObject();
Expand All @@ -1373,7 +1421,7 @@ private void createTestDataForByteValueTests(byte random1, byte random2, byte ra
});
}

private void createTestDataForShortValueTests(short random1, short random2, short random3) throws Exception, IOException {
private void createTestDataForShortValueTests(short random1, short random2, short random3) throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("test_short").field("type", "short").endObject();
Expand All @@ -1391,7 +1439,7 @@ private void createTestDataForShortValueTests(short random1, short random2, shor
});
}

private void createTestDataForIntegerValueTests(int random1, int random2, int random3) throws Exception, IOException {
private void createTestDataForIntegerValueTests(int random1, int random2, int random3) throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("test_integer").field("type", "integer").endObject();
Expand All @@ -1409,7 +1457,7 @@ private void createTestDataForIntegerValueTests(int random1, int random2, int ra
});
}

private void createTestDataForLongValueTests(long random1, long random2, long random3) throws Exception, IOException {
private void createTestDataForLongValueTests(long random1, long random2, long random3) throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("test_long").field("type", "long").endObject();
Expand All @@ -1427,7 +1475,7 @@ private void createTestDataForLongValueTests(long random1, long random2, long ra
});
}

private void createTestDataForDoubleValueTests(double random1, double random2, double random3) throws Exception, IOException {
private void createTestDataForDoubleValueTests(double random1, double random2, double random3) throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("test_double").field("type", "double").endObject();
Expand All @@ -1445,7 +1493,7 @@ private void createTestDataForDoubleValueTests(double random1, double random2, d
});
}

private void createTestDataForFloatValueTests(float random1, float random2, float random3) throws Exception, IOException {
private void createTestDataForFloatValueTests(float random1, float random2, float random3) throws Exception {
createIndex("test");
updateMapping("test", builder -> {
builder.startObject("test_float").field("type", "float").endObject();
Expand Down Expand Up @@ -1481,7 +1529,7 @@ private void indexSimpleDocumentWithTrueValues(Long randomLongDate) throws IOExc
* Creates test data for all numeric get* methods. All values random and different from the other numeric fields already generated.
* It returns a map containing the field name and its randomly generated value to be later used in checking the returned values.
*/
private Map<String,Number> createTestDataForNumericValueTypes(Supplier<Number> randomGenerator) throws Exception, IOException {
private Map<String, Number> createTestDataForNumericValueTypes(Supplier<Number> randomGenerator) throws Exception {
Map<String,Number> map = new HashMap<>();
createIndex("test");
updateMappingForNumericValuesTests("test");
Expand Down Expand Up @@ -1575,31 +1623,19 @@ private Double getMaxLongPlusOne() {
}

private Connection esJdbc(String timeZoneId) throws SQLException {
return randomBoolean() ? useDriverManager(timeZoneId) : useDataSource(timeZoneId);
}

private Connection useDriverManager(String timeZoneId) throws SQLException {
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
String address = "jdbc:es://" + elasticsearchAddress;
Properties connectionProperties = connectionProperties();
connectionProperties.put(JDBC_TIMEZONE, timeZoneId);
Connection connection = DriverManager.getConnection(address, connectionProperties);

Connection connection = esJdbc(connectionProperties);
assertNotNull("The timezone should be specified", connectionProperties.getProperty(JDBC_TIMEZONE));
return connection;
}

private Connection useDataSource(String timeZoneId) throws SQLException {
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
EsDataSource dataSource = new EsDataSource();
String address = "jdbc:es://" + elasticsearchAddress;
dataSource.setUrl(address);
private Connection esWithLeniency(boolean multiValueLeniency) throws SQLException {
String property = "field.multi.value.leniency";
Properties connectionProperties = connectionProperties();
connectionProperties.put(JDBC_TIMEZONE, timeZoneId);
dataSource.setProperties(connectionProperties);
Connection connection = dataSource.getConnection();

assertNotNull("The timezone should be specified", connectionProperties.getProperty(JDBC_TIMEZONE));
connectionProperties.setProperty(property, Boolean.toString(multiValueLeniency));
Connection connection = esJdbc(connectionProperties);
assertNotNull("The leniency should be specified", connectionProperties.getProperty(property));
return connection;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public AbstractSqlQueryRequest() {
}

public AbstractSqlQueryRequest(String query, List<SqlTypedParamValue> params, QueryBuilder filter, ZoneId zoneId,
int fetchSize, TimeValue requestTimeout, TimeValue pageTimeout, RequestInfo requestInfo) {
int fetchSize, TimeValue requestTimeout, TimeValue pageTimeout, RequestInfo requestInfo) {
super(requestInfo);
this.query = query;
this.params = params;
Expand Down
Loading

0 comments on commit 2b35157

Please sign in to comment.