Skip to content

Commit

Permalink
SQL: Verify binary fields found in non-project to have the doc_values (
Browse files Browse the repository at this point in the history
…#69128) (#69257)

This adds a verifier rule to check that any fields used in filtering,
aggregations or ordering has the doc_values. Otherwise the query will
either fail in ES with a less obvious and more verbose reason OR plainly
give wrong results if filtering with `IS [NOT] NULL`.

(cherry picked from commit 1b9ec88)
  • Loading branch information
bpintea authored Feb 19, 2021
1 parent 0b3ce96 commit c8d7883
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
"date" : { "type" : "date" },
"date_nanos" : { "type" : "date_nanos" },
"shape": { "type" : "geo_shape" },
"binary": {"type" : "binary", "doc_values": false },
"binary_stored": {"type" : "binary", "doc_values": true },
"x" : {
"type" : "text",
"fields" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,55 @@ public void testNextPageTSV() throws IOException {
executeQueryWithNextPage("text/tab-separated-values", "text\tnumber\tsum\n", "%s\t%d\t%d\n");
}

public void testBinaryFieldFiltering() throws IOException {
XContentBuilder createIndex = JsonXContent.contentBuilder().startObject();
createIndex.startObject("mappings");
{
createIndex.startObject("properties");
{
createIndex.startObject("id").field("type", "long").endObject();
createIndex.startObject("binary").field("type", "binary").field("doc_values", true).endObject();
}
createIndex.endObject();
}
createIndex.endObject().endObject();

Request request = new Request("PUT", "/test_binary");
request.setJsonEntity(Strings.toString(createIndex));
assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());

long nonNullId = randomLong();
long nullId = randomLong();
StringBuilder bulk = new StringBuilder();
bulk.append("{").append(toJson("index")).append(":{}}\n");
bulk.append("{");
{
bulk.append(toJson("id")).append(":").append(nonNullId);
bulk.append(",");
bulk.append(toJson("binary")).append(":").append(toJson("U29tZSBiaW5hcnkgYmxvYg=="));
}
bulk.append("}\n");
bulk.append("{").append(toJson("index")).append(":{}}\n");
bulk.append("{");
{
bulk.append(toJson("id")).append(":").append(nullId);
}
bulk.append("}\n");

request = new Request("PUT", "/test_binary/_bulk?refresh=true");
request.setJsonEntity(bulk.toString());
assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());

String mode = randomMode();
Map<String, Object> expected = new HashMap<>();
expected.put("columns", singletonList(columnInfo(mode, "id", "long", JDBCType.BIGINT, 20)));
expected.put("rows", singletonList(singletonList(nonNullId)));
assertResponse(expected, runSql(mode, "SELECT id FROM test_binary WHERE binary IS NOT NULL", false));

expected.put("rows", singletonList(singletonList(nullId)));
assertResponse(expected, runSql(mode, "SELECT id FROM test_binary WHERE binary IS NULL", false));
}

private void executeQueryWithNextPage(String format, String expectedHeader, String expectedLineFormat) throws IOException {
int size = 20;
String[] docs = new String[size];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/
package org.elasticsearch.xpack.sql.analysis.analyzer;

import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.xpack.ql.capabilities.Unresolvable;
import org.elasticsearch.xpack.ql.common.Failure;
import org.elasticsearch.xpack.ql.expression.Alias;
Expand Down Expand Up @@ -73,6 +74,7 @@
import static java.util.stream.Collectors.toMap;
import static org.elasticsearch.xpack.ql.analyzer.VerifierChecks.checkFilterConditionType;
import static org.elasticsearch.xpack.ql.common.Failure.fail;
import static org.elasticsearch.xpack.ql.type.DataTypes.BINARY;
import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine;
import static org.elasticsearch.xpack.sql.stats.FeatureMetric.COMMAND;
import static org.elasticsearch.xpack.sql.stats.FeatureMetric.GROUPBY;
Expand Down Expand Up @@ -221,6 +223,7 @@ Collection<Failure> verify(LogicalPlan plan) {
checkPivot(p, localFailures, attributeRefs);
checkMatrixStats(p, localFailures);
checkCastOnInexact(p, localFailures);
checkBinaryHasDocValues(p, localFailures);

// everything checks out
// mark the plan as analyzed
Expand Down Expand Up @@ -883,4 +886,21 @@ private static void checkCastOnInexact(LogicalPlan p, Set<Failure> localFailures
}
}));
}

// check that any binary field used in WHERE, GROUP BY, HAVING or ORDER BY has doc_values, for ES to allow querying it
private static void checkBinaryHasDocValues(LogicalPlan plan, Set<Failure> localFailures) {
List<Tuple<FieldAttribute, String>> fields = new ArrayList<>();

plan.forEachDown(Filter.class, e -> e.condition().forEachDown(FieldAttribute.class,
f -> fields.add(Tuple.tuple(f, "for filtering"))));
plan.forEachDown(Aggregate.class, e -> e.groupings().forEach(g -> g.forEachDown(FieldAttribute.class,
f -> fields.add(Tuple.tuple(f, "in aggregations")))));
plan.forEachDown(OrderBy.class, e -> e.order().forEach(o -> o.child().forEachDown(FieldAttribute.class,
f -> fields.add(Tuple.tuple(f, "for ordering")))));

fields.stream().filter(t -> t.v1().dataType() == BINARY && t.v1().field().isAggregatable() == false).forEach(t -> {
localFailures.add(fail(t.v1(), "Binary field [" + t.v1().name() + "] cannot be used " + t.v2() + " unless it has the "
+ "doc_values setting enabled"));
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1224,4 +1224,22 @@ public void testCastOnInexact() {
assertEquals("1:36: [text] of data type [text] cannot be used for [CAST()] inside the WHERE clause",
error("SELECT * FROM test WHERE NOT (CAST(text AS string) = 'foo') OR true"));
}

public void testBinaryFieldWithDocValues() {
accept("SELECT binary_stored FROM test WHERE binary_stored IS NOT NULL");
accept("SELECT binary_stored FROM test GROUP BY binary_stored HAVING count(binary_stored) > 1");
accept("SELECT count(binary_stored) FROM test HAVING count(binary_stored) > 1");
accept("SELECT binary_stored FROM test ORDER BY binary_stored");
}

public void testBinaryFieldWithNoDocValues() {
assertEquals("1:31: Binary field [binary] cannot be used for filtering unless it has the doc_values setting enabled",
error("SELECT binary FROM test WHERE binary IS NOT NULL"));
assertEquals("1:34: Binary field [binary] cannot be used in aggregations unless it has the doc_values setting enabled",
error("SELECT binary FROM test GROUP BY binary"));
assertEquals("1:45: Binary field [binary] cannot be used for filtering unless it has the doc_values setting enabled",
error("SELECT count(binary) FROM test HAVING count(binary) > 1"));
assertEquals("1:34: Binary field [binary] cannot be used for ordering unless it has the doc_values setting enabled",
error("SELECT binary FROM test ORDER BY binary"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class SysColumnsTests extends ESTestCase {
private static final String CLUSTER_NAME = "cluster";
private static final Map<String, EsField> MAPPING1 = loadMapping("mapping-multi-field-with-nested.json", true);
private static final Map<String, EsField> MAPPING2 = loadMapping("mapping-multi-field-variation.json", true);
private static final int FIELD_COUNT1 = 16;
private static final int FIELD_COUNT1 = 18;
private static final int FIELD_COUNT2 = 17;

private final SqlParser parser = new SqlParser();
Expand Down

0 comments on commit c8d7883

Please sign in to comment.