Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EQL: Filter out null join keys in sequence queries #78195

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,30 @@ query = '''
process where substring(command_line, 5) regex (".*?net[1]? localgroup.*?", ".*? myappserver.py .*?")
'''


[[queries]]
name = "sequenceOnOneNullKey"
query = '''
sequence
[process where parent_process_path == null] by parent_process_path
[any where true] by parent_process_path
'''
expected_event_ids = []

[[queries]]
name = "sequenceOnTwoNullKeys"
query = '''
sequence by ppid
[process where parent_process_path == null] by parent_process_path
[any where true] by parent_process_path
'''
expected_event_ids = []

[[queries]]
name = "sequenceOnImplicitNullKeys"
query = '''
sequence by ppid, parent_process_path
[process where parent_process_path == null]
[any where true]
'''
expected_event_ids = []
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@
package org.elasticsearch.xpack.eql;

import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.apache.logging.log4j.core.config.plugins.util.PluginManager;
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequest;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder.HttpClientConfigCallback;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.common.logging.LogConfigurator;
Expand All @@ -34,9 +40,16 @@ public class EqlDataLoader {
public static void main(String[] args) throws IOException {
// Need to setup the log configuration properly to avoid messages when creating a new RestClient
PluginManager.addPackage(LogConfigurator.class.getPackage().getName());

final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("admin", "admin-password"));
try (
RestClient client = RestClient.builder(new HttpHost("localhost", 9200))
.setHttpClientConfigCallback(new HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
})
Comment on lines +47 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes it generally easier with testing, so good to have it, but curious if this was added for a specific reason.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is a better reason: since #70114 security is enabled in this test cluster.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you.
(teaching me a lesson about reading PRs chronologically...)

.setRequestConfigCallback(
requestConfigBuilder -> requestConfigBuilder.setConnectTimeout(30000000)
.setConnectionRequestTimeout(30000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,8 @@ type = "sequence"

[[queries]]
queryNo = 24
count = 1
expected_event_ids = [2860083, 2860090, 2860098]
count = 0
expected_event_ids = []
filter_counts = [6, 6, 6]
filters = [
'security where hostname != "newyork" and event_id == 4625',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

package org.elasticsearch.xpack.eql.execution.assembler;

import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
Expand All @@ -22,7 +21,6 @@
import java.util.Set;

import static java.util.Collections.emptyList;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.existsQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
Expand Down Expand Up @@ -58,6 +56,13 @@ public BoxedQueryRequest(QueryRequest original, String timestamp, List<String> k
timestampRange = rangeQuery(timestamp).timeZone("UTC").format("epoch_millis");
keys = keyNames;
RuntimeUtils.addFilter(timestampRange, searchSource);
// do not join on null values
if (keyNames.isEmpty() == false) {
for (String keyName : keyNames) {
// add an "exists" query for each join key to filter out any non-existent values
RuntimeUtils.addFilter(existsQuery(keyName), searchSource);
}
}
}

@Override
Expand Down Expand Up @@ -112,18 +117,9 @@ public BoxedQueryRequest keys(List<List<Object>> values) {
// iterate on all possible values for a given key
newFilters = new ArrayList<>(values.size());
for (int keyIndex = 0; keyIndex < keys.size(); keyIndex++) {

boolean hasNullValue = false;
Set<Object> keyValues = new HashSet<>(BoxedQueryRequest.MAX_TERMS);
// check the given keys but make sure to double check for
// null as it translates to a different query (missing/not exists)
for (List<Object> value : values) {
Object keyValue = value.get(keyIndex);
if (keyValue == null) {
hasNullValue = true;
} else {
keyValues.add(keyValue);
}
keyValues.add(value.get(keyIndex));
}

// too many unique terms, don't filter on the keys
Expand All @@ -141,21 +137,6 @@ public BoxedQueryRequest keys(List<List<Object>> values) {
} else if (keyValues.size() > 1) {
query = termsQuery(key, keyValues);
}

// if null values are present
// make an OR call - either terms or null/missing values
if (hasNullValue) {
BoolQueryBuilder isMissing = boolQuery().mustNot(existsQuery(key));
if (query != null) {
query = boolQuery()
// terms query
.should(query)
// is missing
.should(isMissing);
} else {
query = isMissing;
}
}
newFilters.add(query);
}
}
Expand Down