SQL: Verify binary fields found in non-project to have the doc_values (…

…elastic#69128) This adds a verifier rule to check that any fields used in filtering, aggregations or ordering has the doc_values. Otherwise the query will either fail in ES with a less obvious and more verbose reason OR plainly give wrong results if filtering with `IS [NOT] NULL`. (cherry picked from commit 1b9ec88)
bpintea · Feb 19, 2021 · 98addbe · 98addbe
1 parent 0b3ce96
commit 98addbe
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 1 deletion.
diff --git a/x-pack/plugin/ql/src/test/resources/mapping-multi-field-with-nested.json b/x-pack/plugin/ql/src/test/resources/mapping-multi-field-with-nested.json
@@ -8,6 +8,8 @@
         "date" : { "type" : "date" },
         "date_nanos" : { "type" : "date_nanos" },
         "shape": { "type" : "geo_shape" },
+        "binary": {"type" : "binary", "doc_values": false },
+        "binary_stored": {"type" : "binary", "doc_values": true },
         "x" : {
             "type" : "text",
             "fields" : {

diff --git a/...ugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/rest/RestSqlTestCase.java b/...ugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/rest/RestSqlTestCase.java
@@ -952,6 +952,55 @@ public void testNextPageTSV() throws IOException {
         executeQueryWithNextPage("text/tab-separated-values", "text\tnumber\tsum\n", "%s\t%d\t%d\n");
     }
 
+    public void testBinaryFieldFiltering() throws IOException {
+        XContentBuilder createIndex = JsonXContent.contentBuilder().startObject();
+        createIndex.startObject("mappings");
+        {
+            createIndex.startObject("properties");
+            {
+                createIndex.startObject("id").field("type", "long").endObject();
+                createIndex.startObject("binary").field("type", "binary").field("doc_values", true).endObject();
+            }
+            createIndex.endObject();
+        }
+        createIndex.endObject().endObject();
+
+        Request request = new Request("PUT", "/test_binary");
+        request.setJsonEntity(Strings.toString(createIndex));
+        assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
+
+        long nonNullId = randomLong();
+        long nullId = randomLong();
+        StringBuilder bulk = new StringBuilder();
+        bulk.append("{").append(toJson("index")).append(":{}}\n");
+        bulk.append("{");
+        {
+            bulk.append(toJson("id")).append(":").append(nonNullId);
+            bulk.append(",");
+            bulk.append(toJson("binary")).append(":").append(toJson("U29tZSBiaW5hcnkgYmxvYg=="));
+        }
+        bulk.append("}\n");
+        bulk.append("{").append(toJson("index")).append(":{}}\n");
+        bulk.append("{");
+        {
+            bulk.append(toJson("id")).append(":").append(nullId);
+        }
+        bulk.append("}\n");
+
+        request = new Request("PUT", "/test_binary/_bulk?refresh=true");
+        request.setJsonEntity(bulk.toString());
+        assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
+
+        String mode = randomMode();
+        Map<String, Object> expected = new HashMap<>();
+        expected.put("columns", singletonList(columnInfo(mode, "id", "long", JDBCType.BIGINT, 20)));
+        expected.put("rows", singletonList(singletonList(nonNullId)));
+        assertResponse(expected, runSql(mode, "SELECT id FROM test_binary WHERE binary IS NOT NULL", false));
+
+        expected.put("rows", singletonList(singletonList(nullId)));
+        assertResponse(expected, runSql(mode, "SELECT id FROM test_binary WHERE binary IS NULL", false));
+    }
+
     private void executeQueryWithNextPage(String format, String expectedHeader, String expectedLineFormat) throws IOException {
         int size = 20;
         String[] docs = new String[size];

diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java
@@ -6,6 +6,7 @@
  */
 package org.elasticsearch.xpack.sql.analysis.analyzer;
 
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.xpack.ql.capabilities.Unresolvable;
 import org.elasticsearch.xpack.ql.common.Failure;
 import org.elasticsearch.xpack.ql.expression.Alias;
@@ -73,6 +74,7 @@
 import static java.util.stream.Collectors.toMap;
 import static org.elasticsearch.xpack.ql.analyzer.VerifierChecks.checkFilterConditionType;
 import static org.elasticsearch.xpack.ql.common.Failure.fail;
+import static org.elasticsearch.xpack.ql.type.DataTypes.BINARY;
 import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine;
 import static org.elasticsearch.xpack.sql.stats.FeatureMetric.COMMAND;
 import static org.elasticsearch.xpack.sql.stats.FeatureMetric.GROUPBY;
@@ -221,6 +223,7 @@ Collection<Failure> verify(LogicalPlan plan) {
                 checkPivot(p, localFailures, attributeRefs);
                 checkMatrixStats(p, localFailures);
                 checkCastOnInexact(p, localFailures);
+                checkBinaryHasDocValues(p, localFailures);
 
                 // everything checks out
                 // mark the plan as analyzed
@@ -883,4 +886,21 @@ private static void checkCastOnInexact(LogicalPlan p, Set<Failure> localFailures
             }
         }));
     }
+
+    // check that any binary field used in WHERE, GROUP BY, HAVING or ORDER BY has doc_values, for ES to allow querying it
+    private static void checkBinaryHasDocValues(LogicalPlan plan, Set<Failure> localFailures) {
+        List<Tuple<FieldAttribute, String>> fields = new ArrayList<>();
+
+        plan.forEachDown(Filter.class, e -> e.condition().forEachDown(FieldAttribute.class,
+            f -> fields.add(Tuple.tuple(f, "for filtering"))));
+        plan.forEachDown(Aggregate.class, e -> e.groupings().forEach(g -> g.forEachDown(FieldAttribute.class,
+            f -> fields.add(Tuple.tuple(f, "in aggregations")))));
+        plan.forEachDown(OrderBy.class, e -> e.order().forEach(o -> o.child().forEachDown(FieldAttribute.class,
+            f -> fields.add(Tuple.tuple(f, "for ordering")))));
+
+        fields.stream().filter(t -> t.v1().dataType() == BINARY && t.v1().field().isAggregatable() == false).forEach(t -> {
+            localFailures.add(fail(t.v1(), "Binary field [" + t.v1().name() + "] cannot be used " + t.v2() + " unless it has the "
+                + "doc_values setting enabled"));
+        });
+    }
 }
diff --git a/...c/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java b/...c/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java
@@ -1224,4 +1224,22 @@ public void testCastOnInexact() {
         assertEquals("1:36: [text] of data type [text] cannot be used for [CAST()] inside the WHERE clause",
                 error("SELECT * FROM test WHERE NOT (CAST(text AS string) = 'foo') OR true"));
     }
+
+    public void testBinaryFieldWithDocValues() {
+        accept("SELECT binary_stored FROM test WHERE binary_stored IS NOT NULL");
+        accept("SELECT binary_stored FROM test GROUP BY binary_stored HAVING count(binary_stored) > 1");
+        accept("SELECT count(binary_stored) FROM test HAVING count(binary_stored) > 1");
+        accept("SELECT binary_stored FROM test ORDER BY binary_stored");
+    }
+
+    public void testBinaryFieldWithNoDocValues() {
+        assertEquals("1:31: Binary field [binary] cannot be used for filtering unless it has the doc_values setting enabled",
+            error("SELECT binary FROM test WHERE binary IS NOT NULL"));
+        assertEquals("1:34: Binary field [binary] cannot be used in aggregations unless it has the doc_values setting enabled",
+            error("SELECT binary FROM test GROUP BY binary"));
+        assertEquals("1:45: Binary field [binary] cannot be used for filtering unless it has the doc_values setting enabled",
+            error("SELECT count(binary) FROM test HAVING count(binary) > 1"));
+        assertEquals("1:34: Binary field [binary] cannot be used for ordering unless it has the doc_values setting enabled",
+            error("SELECT binary FROM test ORDER BY binary"));
+    }
 }
diff --git a/...l/src/test/java/org/elasticsearch/xpack/sql/plan/logical/command/sys/SysColumnsTests.java b/...l/src/test/java/org/elasticsearch/xpack/sql/plan/logical/command/sys/SysColumnsTests.java
@@ -55,7 +55,7 @@ public class SysColumnsTests extends ESTestCase {
     private static final String CLUSTER_NAME = "cluster";
     private static final Map<String, EsField> MAPPING1 = loadMapping("mapping-multi-field-with-nested.json", true);
     private static final Map<String, EsField> MAPPING2 = loadMapping("mapping-multi-field-variation.json", true);
-    private static final int FIELD_COUNT1 = 16;
+    private static final int FIELD_COUNT1 = 18;
     private static final int FIELD_COUNT2 = 17;
 
     private final SqlParser parser = new SqlParser();