From c8d7883b64007fd92d145e7b04ae1b9d040ecafe Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Fri, 19 Feb 2021 10:49:29 +0100 Subject: [PATCH] SQL: Verify binary fields found in non-project to have the doc_values (#69128) (#69257) This adds a verifier rule to check that any fields used in filtering, aggregations or ordering has the doc_values. Otherwise the query will either fail in ES with a less obvious and more verbose reason OR plainly give wrong results if filtering with `IS [NOT] NULL`. (cherry picked from commit 1b9ec88007586b6923409531dbfe78807dba76a5) --- .../mapping-multi-field-with-nested.json | 2 + .../xpack/sql/qa/rest/RestSqlTestCase.java | 49 +++++++++++++++++++ .../xpack/sql/analysis/analyzer/Verifier.java | 20 ++++++++ .../analyzer/VerifierErrorMessagesTests.java | 18 +++++++ .../logical/command/sys/SysColumnsTests.java | 2 +- 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ql/src/test/resources/mapping-multi-field-with-nested.json b/x-pack/plugin/ql/src/test/resources/mapping-multi-field-with-nested.json index 514a7a8082b2b..907049a600545 100644 --- a/x-pack/plugin/ql/src/test/resources/mapping-multi-field-with-nested.json +++ b/x-pack/plugin/ql/src/test/resources/mapping-multi-field-with-nested.json @@ -8,6 +8,8 @@ "date" : { "type" : "date" }, "date_nanos" : { "type" : "date_nanos" }, "shape": { "type" : "geo_shape" }, + "binary": {"type" : "binary", "doc_values": false }, + "binary_stored": {"type" : "binary", "doc_values": true }, "x" : { "type" : "text", "fields" : { diff --git a/x-pack/plugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/rest/RestSqlTestCase.java b/x-pack/plugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/rest/RestSqlTestCase.java index 99803dd264479..8d52d969ff0c5 100644 --- a/x-pack/plugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/rest/RestSqlTestCase.java +++ b/x-pack/plugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/rest/RestSqlTestCase.java @@ -952,6 +952,55 @@ public void testNextPageTSV() throws IOException { executeQueryWithNextPage("text/tab-separated-values", "text\tnumber\tsum\n", "%s\t%d\t%d\n"); } + public void testBinaryFieldFiltering() throws IOException { + XContentBuilder createIndex = JsonXContent.contentBuilder().startObject(); + createIndex.startObject("mappings"); + { + createIndex.startObject("properties"); + { + createIndex.startObject("id").field("type", "long").endObject(); + createIndex.startObject("binary").field("type", "binary").field("doc_values", true).endObject(); + } + createIndex.endObject(); + } + createIndex.endObject().endObject(); + + Request request = new Request("PUT", "/test_binary"); + request.setJsonEntity(Strings.toString(createIndex)); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + + long nonNullId = randomLong(); + long nullId = randomLong(); + StringBuilder bulk = new StringBuilder(); + bulk.append("{").append(toJson("index")).append(":{}}\n"); + bulk.append("{"); + { + bulk.append(toJson("id")).append(":").append(nonNullId); + bulk.append(","); + bulk.append(toJson("binary")).append(":").append(toJson("U29tZSBiaW5hcnkgYmxvYg==")); + } + bulk.append("}\n"); + bulk.append("{").append(toJson("index")).append(":{}}\n"); + bulk.append("{"); + { + bulk.append(toJson("id")).append(":").append(nullId); + } + bulk.append("}\n"); + + request = new Request("PUT", "/test_binary/_bulk?refresh=true"); + request.setJsonEntity(bulk.toString()); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + + String mode = randomMode(); + Map expected = new HashMap<>(); + expected.put("columns", singletonList(columnInfo(mode, "id", "long", JDBCType.BIGINT, 20))); + expected.put("rows", singletonList(singletonList(nonNullId))); + assertResponse(expected, runSql(mode, "SELECT id FROM test_binary WHERE binary IS NOT NULL", false)); + + expected.put("rows", singletonList(singletonList(nullId))); + assertResponse(expected, runSql(mode, "SELECT id FROM test_binary WHERE binary IS NULL", false)); + } + private void executeQueryWithNextPage(String format, String expectedHeader, String expectedLineFormat) throws IOException { int size = 20; String[] docs = new String[size]; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java index e449fb488aa99..ce2759deb7a36 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.sql.analysis.analyzer; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.xpack.ql.capabilities.Unresolvable; import org.elasticsearch.xpack.ql.common.Failure; import org.elasticsearch.xpack.ql.expression.Alias; @@ -73,6 +74,7 @@ import static java.util.stream.Collectors.toMap; import static org.elasticsearch.xpack.ql.analyzer.VerifierChecks.checkFilterConditionType; import static org.elasticsearch.xpack.ql.common.Failure.fail; +import static org.elasticsearch.xpack.ql.type.DataTypes.BINARY; import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine; import static org.elasticsearch.xpack.sql.stats.FeatureMetric.COMMAND; import static org.elasticsearch.xpack.sql.stats.FeatureMetric.GROUPBY; @@ -221,6 +223,7 @@ Collection verify(LogicalPlan plan) { checkPivot(p, localFailures, attributeRefs); checkMatrixStats(p, localFailures); checkCastOnInexact(p, localFailures); + checkBinaryHasDocValues(p, localFailures); // everything checks out // mark the plan as analyzed @@ -883,4 +886,21 @@ private static void checkCastOnInexact(LogicalPlan p, Set localFailures } })); } + + // check that any binary field used in WHERE, GROUP BY, HAVING or ORDER BY has doc_values, for ES to allow querying it + private static void checkBinaryHasDocValues(LogicalPlan plan, Set localFailures) { + List> fields = new ArrayList<>(); + + plan.forEachDown(Filter.class, e -> e.condition().forEachDown(FieldAttribute.class, + f -> fields.add(Tuple.tuple(f, "for filtering")))); + plan.forEachDown(Aggregate.class, e -> e.groupings().forEach(g -> g.forEachDown(FieldAttribute.class, + f -> fields.add(Tuple.tuple(f, "in aggregations"))))); + plan.forEachDown(OrderBy.class, e -> e.order().forEach(o -> o.child().forEachDown(FieldAttribute.class, + f -> fields.add(Tuple.tuple(f, "for ordering"))))); + + fields.stream().filter(t -> t.v1().dataType() == BINARY && t.v1().field().isAggregatable() == false).forEach(t -> { + localFailures.add(fail(t.v1(), "Binary field [" + t.v1().name() + "] cannot be used " + t.v2() + " unless it has the " + + "doc_values setting enabled")); + }); + } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java index f631bca4cbd4e..27c6736f5ef86 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java @@ -1224,4 +1224,22 @@ public void testCastOnInexact() { assertEquals("1:36: [text] of data type [text] cannot be used for [CAST()] inside the WHERE clause", error("SELECT * FROM test WHERE NOT (CAST(text AS string) = 'foo') OR true")); } + + public void testBinaryFieldWithDocValues() { + accept("SELECT binary_stored FROM test WHERE binary_stored IS NOT NULL"); + accept("SELECT binary_stored FROM test GROUP BY binary_stored HAVING count(binary_stored) > 1"); + accept("SELECT count(binary_stored) FROM test HAVING count(binary_stored) > 1"); + accept("SELECT binary_stored FROM test ORDER BY binary_stored"); + } + + public void testBinaryFieldWithNoDocValues() { + assertEquals("1:31: Binary field [binary] cannot be used for filtering unless it has the doc_values setting enabled", + error("SELECT binary FROM test WHERE binary IS NOT NULL")); + assertEquals("1:34: Binary field [binary] cannot be used in aggregations unless it has the doc_values setting enabled", + error("SELECT binary FROM test GROUP BY binary")); + assertEquals("1:45: Binary field [binary] cannot be used for filtering unless it has the doc_values setting enabled", + error("SELECT count(binary) FROM test HAVING count(binary) > 1")); + assertEquals("1:34: Binary field [binary] cannot be used for ordering unless it has the doc_values setting enabled", + error("SELECT binary FROM test ORDER BY binary")); + } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plan/logical/command/sys/SysColumnsTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plan/logical/command/sys/SysColumnsTests.java index c62519d237bb8..633e641b6001b 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plan/logical/command/sys/SysColumnsTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plan/logical/command/sys/SysColumnsTests.java @@ -55,7 +55,7 @@ public class SysColumnsTests extends ESTestCase { private static final String CLUSTER_NAME = "cluster"; private static final Map MAPPING1 = loadMapping("mapping-multi-field-with-nested.json", true); private static final Map MAPPING2 = loadMapping("mapping-multi-field-variation.json", true); - private static final int FIELD_COUNT1 = 16; + private static final int FIELD_COUNT1 = 18; private static final int FIELD_COUNT2 = 17; private final SqlParser parser = new SqlParser();