Skip to content

Commit

Permalink
Add scripts to keyword field mapper (#71555)
Browse files Browse the repository at this point in the history
This commit adds script and on_script_error parameters to
keyword field mappers, allowing you to define index-time scripts
for keyword fields.
  • Loading branch information
romseygeek authored Apr 12, 2021
1 parent 3c198e2 commit 5e11709
Show file tree
Hide file tree
Showing 13 changed files with 422 additions and 26 deletions.
24 changes: 23 additions & 1 deletion docs/reference/mapping/types/keyword.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
The keyword family includes the following field types:

* <<keyword-field-type,`keyword`>>, which is used for structured content such as IDs, email
addresses, hostnames, status codes, zip codes, or tags.
addresses, hostnames, status codes, zip codes, or tags.
* <<constant-keyword-field-type,`constant_keyword`>> for keyword fields that always contain
the same value.
* <<wildcard-field-type,`wildcard`>> for unstructured machine-generated content.
Expand Down Expand Up @@ -100,6 +100,28 @@ The following parameters are accepted by `keyword` fields:

Accepts a string value which is substituted for any explicit `null`
values. Defaults to `null`, which means the field is treated as missing.
Note that this cannot be set if the `script` value is used.

`on_script_error`::

Defines what to do if the script defined by the `script` parameter
throws an error at indexing time. Accepts `reject` (default), which
will cause the entire document to be rejected, and `ignore`, which
will register the field in the document's
<<mapping-ignored-field,`_ignored`>> metadata field and continue
indexing. This parameter can only be set if the `script` field is
also set.

`script`::

If this parameter is set, then the field will index values generated
by this script, rather than reading the values directly from the
source. If a value is set for this field on the input document, then
the document will be rejected with an error.
Scripts are in the same format as their
<<runtime-mapping-fields,runtime equivalent>>. Values emitted by the
script are normalized as usual, and will be ignored if they are longer
that the value set on `ignore_above`.

<<mapping-store,`store`>>::

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
---
setup:
- do:
indices.create:
index: sensor
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: double
node:
type: keyword
store: true
day_of_week:
type: keyword
script: |
emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT));
# Test fetching from _source
day_of_week_from_source:
type: keyword
script: |
Instant instant = Instant.ofEpochMilli(params._source.timestamp);
ZonedDateTime dt = ZonedDateTime.ofInstant(instant, ZoneId.of("UTC"));
emit(dt.dayOfWeek.getDisplayName(TextStyle.FULL, Locale.ROOT));
# Test fetching many values
day_of_week_letters:
type: keyword
script: |
for (String dow: doc['day_of_week']) {
for (int i = 0; i < dow.length(); i++) {
emit(dow.charAt(i).toString());
}
}
prefixed_node:
type: keyword
script:
source: |
for (String node : params._fields.node.values) {
emit(params.prefix + node);
}
params:
prefix: node_

- do:
bulk:
index: sensor
refresh: true
body: |
{"index":{}}
{"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"}
{"index":{}}
{"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"}
{"index":{}}
{"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"}
{"index":{}}
{"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"}
{"index":{}}
{"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"}
{"index":{}}
{"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"}
---
"get mapping":
- do:
indices.get_mapping:
index: sensor
- match: {sensor.mappings.properties.day_of_week.type: keyword }
- match:
sensor.mappings.properties.day_of_week.script.source: |
emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT));
- match: {sensor.mappings.properties.day_of_week.script.lang: painless }

---
"fetch fields":
- do:
search:
index: sensor
body:
sort: timestamp
fields: [day_of_week, day_of_week_from_source, day_of_week_letters, prefixed_node]
- match: {hits.total.value: 6}
- match: {hits.hits.0.fields.day_of_week: [Thursday] }
- match: {hits.hits.0.fields.day_of_week_from_source: [Thursday] }
- match: {hits.hits.0.fields.day_of_week_letters: [T, h, u, r, s, d, a, y] }
- match: {hits.hits.0.fields.prefixed_node: [node_c] }

---
"docvalue_fields":
- do:
search:
index: sensor
body:
sort: timestamp
docvalue_fields: [day_of_week, day_of_week_from_source, day_of_week_letters, prefixed_node]
- match: {hits.total.value: 6}
- match: {hits.hits.0.fields.day_of_week: [Thursday] }
- match: {hits.hits.0.fields.day_of_week_from_source: [Thursday] }
- match: {hits.hits.0.fields.day_of_week_letters: [T, a, d, h, r, s, u, y] }
- match: {hits.hits.0.fields.prefixed_node: [node_c] }

---
"terms agg":
- do:
search:
index: sensor
body:
size: 0
aggs:
dow:
terms:
field: day_of_week
- match: {hits.total.value: 6}
- match: {aggregations.dow.buckets.0.key: Friday}
- match: {aggregations.dow.buckets.0.doc_count: 1}
- match: {aggregations.dow.buckets.1.key: Monday}
- match: {aggregations.dow.buckets.1.doc_count: 1}

---
"term query":
- do:
search:
index: sensor
body:
query:
term:
day_of_week: Monday
- match: {hits.total.value: 1}
- match: {hits.hits.0._source.voltage: 5.8}

---
"highlight term query":
- do:
search:
index: sensor
body:
query:
term:
day_of_week: Monday
highlight:
fields:
day_of_week: {}

- match: { hits.hits.0.highlight.day_of_week : [ "<em>Monday</em>" ] }

---
"match query":
- do:
search:
index: sensor
body:
query:
match:
day_of_week: Monday
- match: {hits.total.value: 1}
- match: {hits.hits.0._source.voltage: 5.8}

- do:
search:
index: sensor
body:
query:
match:
day_of_week:
query: Monday
analyzer: standard
- match: {hits.total.value: 0}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.elasticsearch.index.mapper.MapperRegistry;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.script.ScriptCompiler;
import org.elasticsearch.script.ScriptService;

import java.util.AbstractMap;
Expand All @@ -50,15 +51,15 @@ public class IndexMetadataVerifier {
private final NamedXContentRegistry xContentRegistry;
private final MapperRegistry mapperRegistry;
private final IndexScopedSettings indexScopedSettings;
private final ScriptService scriptService;
private final ScriptCompiler scriptService;

public IndexMetadataVerifier(Settings settings, NamedXContentRegistry xContentRegistry, MapperRegistry mapperRegistry,
IndexScopedSettings indexScopedSettings, ScriptService scriptService) {
IndexScopedSettings indexScopedSettings, ScriptCompiler scriptCompiler) {
this.settings = settings;
this.xContentRegistry = xContentRegistry;
this.mapperRegistry = mapperRegistry;
this.indexScopedSettings = indexScopedSettings;
this.scriptService = scriptService;
this.scriptService = scriptCompiler;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOE

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
checkField(field);
List<BytesRef> values = document.getFields().stream()
.filter(f -> Objects.equals(f.name(), field))
.filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED_SET)
Expand Down
Loading

0 comments on commit 5e11709

Please sign in to comment.