Skip to content

Commit

Permalink
Support reading ignored source as part of value source loading via bl…
Browse files Browse the repository at this point in the history
…ock loaders.

Currently, in compute engine when loading source if source mode is synthetic, the synthetic source loader is already used. But the ignored_source field isn't always marked as a required source field, causing the source to potentially miss a lot of fields.

This change includes `_ignored_source` field as a required stored field.

Long term in case of synthetic source we should only load ignored source in case a field has no doc values or stored field. Like is being explored in elastic#114886
  • Loading branch information
martijnvg committed Oct 16, 2024
1 parent 1c0e292 commit 6295fa8
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -632,14 +632,7 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues()) {
return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name());
}
if (isSyntheticSource) {
if (false == isStored()) {
throw new IllegalStateException(
"keyword field ["
+ name()
+ "] is only supported in synthetic _source index if it creates doc values or stored fields"
);
}
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
}
SourceValueFetcher fetcher = sourceValueFetcher(blContext.sourcePaths(name()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

package org.elasticsearch.search.fetch;

import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
Expand All @@ -33,7 +35,8 @@ public boolean noRequirements() {
/**
* Use when the source should be loaded but no other stored fields are required
*/
public static final StoredFieldsSpec NEEDS_SOURCE = new StoredFieldsSpec(true, false, Set.of());
//TODO: add NEEDS_IGNORE_SOURCE constant
public static final StoredFieldsSpec NEEDS_SOURCE = new StoredFieldsSpec(true, false, Set.of(IgnoredSourceFieldMapper.NAME));

/**
* Combine these stored field requirements with those from another StoredFieldsSpec
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugin/logsdb/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ base {

restResources {
restApi {
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities'
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query'
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
---
setup:
- requires:
test_runner_features: allowed_warnings_regex

- do:
indices.create:
index: my-index
body:
settings:
index:
mode: logsdb
mappings:
properties:
"@timestamp":
type: date
host.name:
type: keyword
agent_id:
type: keyword
doc_values: false
store: false
process_id:
type: integer
doc_values: false
store: false
http_method:
type: keyword
doc_values: false
store: false
is_https:
type: boolean
doc_values: false
store: false
location:
type: geo_point
doc_values: false
store: false
message:
type: text
store: false

- do:
bulk:
index: my-index
refresh: true
body:
- { "index": { } }
- { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "No, I am your father." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Do. Or do not. There is no try." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "May the force be with you." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "I find your lack of faith disturbing." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Wars not make one great." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "That's no moon. It's a space station." }

---
teardown:
- do:
indices.delete:
index: my-index

# column order varies between test runs...
#---
#"Simple from":
# - do:
# allowed_warnings_regex:
# - "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
# esql.query:
# body:
# query: 'FROM my-index | LIMIT 1'
#
# - match: {columns.0.name: "@timestamp"}
# - match: {columns.0.type: "date"}
# - match: {columns.1.name: "agent_id"}
# - match: {columns.1.type: "keyword"}
# - match: {columns.2.name: "host.name"}
# - match: {columns.2.type: "keyword"}
# - match: {columns.4.name: "is_https"}
# - match: {columns.4.type: "boolean"}
# - match: {columns.5.name: "http_method"}
# - match: {columns.5.type: "keyword"}
# - match: {columns.6.name: "location"}
# - match: {columns.6.type: "geo_point"}
# - match: {columns.7.name: "message"}
# - match: {columns.7.type: "text"}
# - match: {columns.8.name: "process_id"}
# - match: {columns.8.type: "integer"}
#
# - match: {values.0.0: "2024-02-12T10:31:00.000Z"}
# - match: {values.0.1: "yoda"}
# - match: {values.0.2: "bar"}
# - match: {values.0.3: "PUT"}
# - match: {values.0.4: "false"}
# - match: {values.0.5: "POINT (-74.006 40.7128)"}
# - match: {values.0.6: null} # null is expected, because text fields aren't stored in ignored source
# - match: {values.0.7: 102}

---
"Simple from geo point":
- do:
esql.query:
body:
query: 'FROM my-index | KEEP location | LIMIT 10'

- match: {columns.0.name: "location"}
- match: {columns.0.type: "geo_point"}

- match: {values.0.0: "POINT (-74.006 40.7128)"}
- match: {values.1.0: "POINT (-74.006 40.7128)"}
- match: {values.2.0: "POINT (-74.006 40.7128)"}
- match: {values.3.0: "POINT (-74.006 40.7128)"}
- match: {values.4.0: "POINT (-74.006 40.7128)"}
- match: {values.5.0: "POINT (-74.006 40.7128)"}

---
"Simple from number fields":
- do:
esql.query:
body:
query: 'FROM my-index | KEEP process_id | LIMIT 10'

- match: {columns.0.name: "process_id"}
- match: {columns.0.type: "integer"}

- match: {values.0.0: 102}
- match: {values.1.0: 102}
- match: {values.2.0: 104}
- match: {values.3.0: 101}
- match: {values.4.0: 103}
- match: {values.5.0: 105}

---
"Simple from keyword fields":
- do:
esql.query:
body:
query: 'FROM my-index | KEEP agent_id, http_method | LIMIT 10'

- match: {columns.0.name: "agent_id"}
- match: {columns.0.type: "keyword"}
- match: {columns.1.name: "http_method"}
- match: {columns.1.type: "keyword"}

- match: {values.0.0: "yoda"}
- match: {values.0.1: "PUT"}
- match: {values.1.0: "darth-vader"}
- match: {values.1.1: "POST"}
- match: {values.2.0: "yoda"}
- match: {values.2.1: "POST"}
- match: {values.3.0: "darth-vader"}
- match: {values.3.1: "GET"}
- match: {values.4.0: "obi-wan"}
- match: {values.4.1: "GET"}
- match: {values.5.0: "obi-wan"}
- match: {values.5.1: "GET"}

---
"Simple from boolean fields":
- do:
esql.query:
body:
query: 'FROM my-index | KEEP is_https | LIMIT 10'

- match: {columns.0.name: "is_https"}
- match: {columns.0.type: "boolean"}

- match: {values.0.0: false}
- match: {values.1.0: true}
- match: {values.2.0: false}
- match: {values.3.0: false}
- match: {values.4.0: false}
- match: {values.5.0: false}

---
"Simple from text fields":
- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'FROM my-index | KEEP message | LIMIT 10'

- match: {columns.0.name: "message"}
- match: {columns.0.type: "text"}

# null is expected, because text fields aren't stored in ignored source
- match: {values.0.0: null}
- match: {values.1.0: null}
- match: {values.2.0: null}
- match: {values.3.0: null}
- match: {values.4.0: null}
- match: {values.5.0: null}

0 comments on commit 6295fa8

Please sign in to comment.