Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ES|QL] Combine Disjunctive CIDRMatch #111501

Merged
merged 11 commits into from
Aug 8, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,12 @@ private static Query boolQuery(Source source, Query left, Query right, boolean i
}
List<Query> queries;
// check if either side is already a bool query to an extra bool query
if (left instanceof BoolQuery bool && bool.isAnd() == isAnd) {
queries = CollectionUtils.combine(bool.queries(), right);
if (left instanceof BoolQuery leftBool && leftBool.isAnd() == isAnd) {
if (right instanceof BoolQuery rightBool && rightBool.isAnd() == isAnd) {
queries = CollectionUtils.combine(leftBool.queries(), rightBool.queries());
} else {
queries = CollectionUtils.combine(leftBool.queries(), right);
}
} else if (right instanceof BoolQuery bool && bool.isAnd() == isAnd) {
queries = CollectionUtils.combine(bool.queries(), left);
} else {
Expand Down
60 changes: 55 additions & 5 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -282,18 +282,18 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
// end::to_ip-result[]
;

cdirMatchOrs
cdirMatchOrsIPs
required_capability: combine_disjunctive_cidrmatches
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved

FROM hosts
| WHERE CIDR_MATCH(ip1, "127.0.0.2/32") or CIDR_MATCH(ip1, "127.0.0.3/32") or CIDR_MATCH(ip0, "127.0.0.1") or CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9")
FROM hosts
| WHERE CIDR_MATCH(ip1, "127.0.0.2/32") or CIDR_MATCH(ip0, "127.0.0.1") or CIDR_MATCH(ip1, "127.0.0.3/32") or CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9")
| KEEP card, host, ip0, ip1
| sort host, card, ip0, ip1
;
warning:Line 2:20: evaluation of [ip1] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:20: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:90: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:90: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:55: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:55: java.lang.IllegalArgumentException: single-value function encountered multi-value

card:keyword |host:keyword |ip0:ip |ip1:ip
eth0 |alpha |127.0.0.1 |127.0.0.1
Expand All @@ -304,6 +304,56 @@ eth0 |gamma |fe80::cae2:65ff:fece:feb9|127.0.0.3
lo0 |gamma |fe80::cae2:65ff:fece:feb9|fe81::cae2:65ff:fece:feb9
;

cdirMatchEqualsInsOrs
required_capability: combine_disjunctive_cidrmatches

FROM hosts
| WHERE host == "alpha" OR host == "gamma" OR CIDR_MATCH(ip1, "127.0.0.2/32") OR CIDR_MATCH(ip0, "127.0.0.1") OR card IN ("eth0", "eth1") OR CIDR_MATCH(ip1, "127.0.0.3/32") OR card == "lo0" OR CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9") OR host == "beta"
| KEEP card, host, ip0, ip1
| sort host, card, ip0, ip1
;
warning:Line 2:58: evaluation of [ip1] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:58: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:93: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:93: java.lang.IllegalArgumentException: single-value function encountered multi-value

card:keyword |host:keyword |ip0:ip |ip1:ip
eth0 |alpha |127.0.0.1 |127.0.0.1
eth1 |alpha |::1 |::1
eth0 |beta |127.0.0.1 |::1
eth1 |beta |127.0.0.1 |127.0.0.2
eth1 |beta |127.0.0.1 |128.0.0.1
eth0 |epsilon |[fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:fec0, fe80::cae2:65ff:fece:fec1] |fe80::cae2:65ff:fece:fec1
eth1 |epsilon |null |[127.0.0.1, 127.0.0.2, 127.0.0.3]
eth0 |gamma |fe80::cae2:65ff:fece:feb9 |127.0.0.3
lo0 |gamma |fe80::cae2:65ff:fece:feb9 |fe81::cae2:65ff:fece:feb9
;

cdirMatchEqualsInsOrsIPs
required_capability: combine_disjunctive_cidrmatches

FROM hosts
| WHERE host == "alpha" OR host == "gamma" OR CIDR_MATCH(ip1, "127.0.0.2/32") OR ip0 == "127.0.0.1" OR card IN ("eth0", "eth1") OR ip1 IN ("127.0.0.3", "127.0.0.1") OR card == "lo0" OR CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9") OR host == "beta"
| KEEP card, host, ip0, ip1
| sort host, card, ip0, ip1
;
warning:Line 2:58: evaluation of [ip1] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:58: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:82: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:82: java.lang.IllegalArgumentException: single-value function encountered multi-value

card:keyword |host:keyword |ip0:ip |ip1:ip
eth0 |alpha |127.0.0.1 |127.0.0.1
eth1 |alpha |::1 |::1
eth0 |beta |127.0.0.1 |::1
eth1 |beta |127.0.0.1 |127.0.0.2
eth1 |beta |127.0.0.1 |128.0.0.1
eth0 |epsilon |[fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:fec0, fe80::cae2:65ff:fece:fec1] |fe80::cae2:65ff:fece:fec1
eth1 |epsilon |null |[127.0.0.1, 127.0.0.2, 127.0.0.3]
eth0 |gamma |fe80::cae2:65ff:fece:feb9 |127.0.0.3
lo0 |gamma |fe80::cae2:65ff:fece:feb9 |fe81::cae2:65ff:fece:feb9
;

pushDownIP
from hosts | where ip1 == to_ip("::1") | keep card, host, ip0, ip1;
ignoreOrder:true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ public enum Cap {
*/
COMBINE_BINARY_COMPARISONS,

/**
* MATCH command support
*/
MATCH_COMMAND(true),

/**
* Support CIDRMatch in CombineDisjunctives rule.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.elasticsearch.xpack.esql.optimizer.rules.BooleanFunctionEqualsElimination;
import org.elasticsearch.xpack.esql.optimizer.rules.BooleanSimplification;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineBinaryComparisons;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineDisjunctionsToIn;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineDisjunctions;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineEvals;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineProjections;
import org.elasticsearch.xpack.esql.optimizer.rules.ConstantFolding;
Expand Down Expand Up @@ -209,7 +209,7 @@ protected static Batch<LogicalPlan> operators() {
new PropagateNullable(),
new BooleanFunctionEqualsElimination(),
new CombineBinaryComparisons(),
new CombineDisjunctionsToIn(),
new CombineDisjunctions(),
new SimplifyComparisonsArithmetics(DataType::areCompatible),
// prune/elimination
new PruneFilters(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@

package org.elasticsearch.xpack.esql.optimizer.rules;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Or;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
Expand All @@ -24,79 +28,107 @@

import static org.elasticsearch.xpack.esql.core.expression.predicate.Predicates.combineOr;
import static org.elasticsearch.xpack.esql.core.expression.predicate.Predicates.splitOr;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.ipToString;

/**
* Combine disjunctions on the same field into an In expression.
* Combine disjunctive Equals or In expression on the same field into an In expression.
* This rule looks for both simple equalities:
* 1. a == 1 OR a == 2 becomes a IN (1, 2)
* and combinations of In
* 2. a == 1 OR a IN (2) becomes a IN (1, 2)
* 3. a IN (1) OR a IN (2) becomes a IN (1, 2)
* Combine disjunctive Equals, In or CIDRMatch expressions on the same field into a CIDRMatch expression.
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
* 4. CIDRMatch(a, ip1) OR CIDRMatch(a, ip2) OR a = ip3 or a IN (ip4, ip5) becomes CIDRMatch(a, ip1, ip2, ip3, ip4, ip5)
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
* <p>
* This rule does NOT check for type compatibility as that phase has been
* already be verified in the analyzer.
*/
public final class CombineDisjunctionsToIn extends OptimizerRules.OptimizerExpressionRule<Or> {
public CombineDisjunctionsToIn() {
public final class CombineDisjunctions extends OptimizerRules.OptimizerExpressionRule<Or> {
public CombineDisjunctions() {
super(OptimizerRules.TransformDirection.UP);
}

protected In createIn(Expression key, List<Expression> values, ZoneId zoneId) {
protected static In createIn(Expression key, List<Expression> values, ZoneId zoneId) {
return new In(key.source(), key, values);
}

protected Equals createEquals(Expression k, Set<Expression> v, ZoneId finalZoneId) {
protected static Equals createEquals(Expression k, Set<Expression> v, ZoneId finalZoneId) {
return new Equals(k.source(), k, v.iterator().next(), finalZoneId);
}

protected CIDRMatch createCIDRMatch(Expression k, List<Expression> v) {
protected static CIDRMatch createCIDRMatch(Expression k, List<Expression> v) {
return new CIDRMatch(k.source(), k, v);
}

@Override
public Expression rule(Or or) {
Expression e = or;
// look only at equals and In
// look only at equals, In and CIDRMatch
List<Expression> exps = splitOr(e);

Map<Expression, Set<Expression>> found = new LinkedHashMap<>();
Map<Expression, Set<Expression>> cIDRMatch = new LinkedHashMap<>();
Map<Expression, Set<Expression>> ins = new LinkedHashMap<>();
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
Map<Expression, Set<Expression>> cidrs = new LinkedHashMap<>();
Map<Expression, Set<Expression>> ips = new LinkedHashMap<>();
ZoneId zoneId = null;
List<Expression> ors = new LinkedList<>();
boolean changed = false;

for (Expression exp : exps) {
if (exp instanceof Equals eq) {
// consider only equals against foldables
if (eq.right().foldable()) {
found.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(eq.right());
ins.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(eq.right());
if (eq.left().dataType() == DataType.IP) {
Object value = eq.right().fold();
if (value instanceof BytesRef bytesRef) {
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
value = ipToString(bytesRef);
}
ips.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(new Literal(Source.EMPTY, value, DataType.IP));
}
} else {
ors.add(exp);
}
if (zoneId == null) {
zoneId = eq.zoneId();
}
} else if (exp instanceof In in) {
found.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(in.list());
ins.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(in.list());
if (in.value().dataType() == DataType.IP) {
List<Expression> values = new ArrayList<>(in.list().size());
for (Expression i : in.list()) {
Object value = i.fold();
if (value instanceof BytesRef bytesRef) {
value = ipToString(bytesRef);
}
values.add(new Literal(Source.EMPTY, value, DataType.IP));
}
ips.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(values);
}
} else if (exp instanceof CIDRMatch cm) {
cIDRMatch.computeIfAbsent(cm.ipField(), k -> new LinkedHashSet<>()).addAll(cm.matches());
cidrs.computeIfAbsent(cm.ipField(), k -> new LinkedHashSet<>()).addAll(cm.matches());
} else {
ors.add(exp);
}
}

if (found.isEmpty() == false) {
if (cidrs.isEmpty() == false) {
for (Expression f : ips.keySet()) {
cidrs.computeIfAbsent(f, k -> new LinkedHashSet<>()).addAll(ips.get(f));
ins.remove(f);
}
}

if (ins.isEmpty() == false) {
// combine equals alongside the existing ors
final ZoneId finalZoneId = zoneId;
fang-xing-esql marked this conversation as resolved.
Show resolved Hide resolved
found.forEach(
ins.forEach(
(k, v) -> { ors.add(v.size() == 1 ? createEquals(k, v, finalZoneId) : createIn(k, new ArrayList<>(v), finalZoneId)); }
);

changed = true;
}

if (cIDRMatch.isEmpty() == false) {
cIDRMatch.forEach((k, v) -> { ors.add(createCIDRMatch(k, new ArrayList<>(v))); });
if (cidrs.isEmpty() == false) {
cidrs.forEach((k, v) -> { ors.add(createCIDRMatch(k, new ArrayList<>(v))); });
changed = true;
}

Expand Down
Loading