Skip to content

Commit

Permalink
Take into account text fields with keyword sub fields.
Browse files Browse the repository at this point in the history
If text fields are not stored, then keyword sub fields
can be used to syntesize fields for text parent field.
  • Loading branch information
martijnvg committed Oct 17, 2024
1 parent 2880403 commit c6e303e
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1012,12 +1012,14 @@ protected String delegatingTo() {
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name());
}
if (isSyntheticSource) {
if (isSyntheticSource && syntheticSourceDelegate == null) {
/*
* When we're in synthetic source mode we don't currently
* support text fields that are not stored and are not children
* of perfect keyword fields. We'd have to load from the parent
* field and then convert the result to a string.
* field and then convert the result to a string. In this case,
* even if we would synthesize the source, the current field
* would be missing.
*/
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ setup:
message:
type: text
store: false
fields:
raw:
type: keyword

- do:
bulk:
Expand All @@ -64,12 +67,9 @@ teardown:
indices.delete:
index: my-index

# column order varies between test runs...
---
"Simple from":
- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1'
Expand All @@ -88,17 +88,20 @@ teardown:
- match: {columns.5.type: "geo_point"}
- match: {columns.6.name: "message"}
- match: {columns.6.type: "text"}
- match: {columns.7.name: "process_id"}
- match: {columns.7.type: "integer"}
- match: {columns.7.name: "message.raw"}
- match: {columns.7.type: "keyword"}
- match: {columns.8.name: "process_id"}
- match: {columns.8.type: "integer"}

- match: {values.0.0: "2024-02-12T10:31:00.000Z"}
- match: {values.0.1: "yoda"}
- match: {values.0.2: "bar"}
- match: {values.0.3: "PUT"}
- match: {values.0.4: false}
- match: {values.0.5: "POINT (-74.006 40.7128)"}
- match: {values.0.6: null} # null is expected, because text fields aren't stored in ignored source
- match: {values.0.7: 102}
- match: {values.0.6: "Do. Or do not. There is no try."}
- match: {values.0.7: "Do. Or do not. There is no try."}
- match: {values.0.8: 102}

---
"Simple from geo point":
Expand Down Expand Up @@ -178,12 +181,117 @@ teardown:

---
"Simple from text fields":
- do:
esql.query:
body:
query: 'FROM my-index | SORT host.name, @timestamp | KEEP message | LIMIT 10'

- match: {columns.0.name: "message"}
- match: {columns.0.type: "text"}

- match: {values.0.0: "Do. Or do not. There is no try."}
- match: {values.1.0: "I find your lack of faith disturbing."}
- match: {values.2.0: "Wars not make one great."}
- match: {values.3.0: "No, I am your father."}
- match: {values.4.0: "May the force be with you."}
- match: {values.5.0: "That's no moon. It's a space station."}

---
"message field without keyword multi-field":
- do:
indices.create:
index: my-index2
body:
settings:
index:
mode: logsdb
mappings:
properties:
"@timestamp":
type: date
host.name:
type: keyword
agent_id:
type: keyword
doc_values: false
store: false
process_id:
type: integer
doc_values: false
store: false
http_method:
type: keyword
doc_values: false
store: false
is_https:
type: boolean
doc_values: false
store: false
location:
type: geo_point
doc_values: false
store: false
message:
type: text
store: false

- do:
bulk:
index: my-index2
refresh: true
body:
- { "index": { } }
- { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "No, I am your father." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "Do. Or do not. There is no try." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "May the force be with you." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "I find your lack of faith disturbing." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "Wars not make one great." }
- { "index": { } }
- { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "That's no moon. It's a space station." }

- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'FROM my-index | SORT host.name, @timestamp | KEEP message | LIMIT 10'
query: 'FROM my-index2 | SORT host.name, @timestamp | LIMIT 1'

- match: {columns.0.name: "@timestamp"}
- match: {columns.0.type: "date"}
- match: {columns.1.name: "agent_id"}
- match: {columns.1.type: "keyword"}
- match: {columns.2.name: "host.name"}
- match: {columns.2.type: "keyword"}
- match: {columns.3.name: "http_method" }
- match: {columns.3.type: "keyword" }
- match: {columns.4.name: "is_https"}
- match: {columns.4.type: "boolean"}
- match: {columns.5.name: "location"}
- match: {columns.5.type: "geo_point"}
- match: {columns.6.name: "message"}
- match: {columns.6.type: "text"}
- match: {columns.7.name: "process_id"}
- match: {columns.7.type: "integer"}

- match: {values.0.0: "2024-02-12T10:31:00.000Z"}
- match: {values.0.1: "yoda"}
- match: {values.0.2: "bar"}
- match: {values.0.3: "PUT"}
- match: {values.0.4: false}
- match: {values.0.5: "POINT (-74.006 40.7128)"}
- match: {values.0.6: null} # null is expected, because text fields aren't stored in ignored source
- match: {values.0.7: 102}

- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
esql.query:
body:
query: 'FROM my-index2 | SORT host.name, @timestamp | KEEP message | LIMIT 10'

- match: {columns.0.name: "message"}
- match: {columns.0.type: "text"}
Expand Down

0 comments on commit c6e303e

Please sign in to comment.