From d1bb2d4865a5e9c4ab8f5e2ea7362bb881631aeb Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Mon, 23 Oct 2017 14:57:22 -0700 Subject: [PATCH 1/3] Introduce templating support to timezone/locale in DateProcessor Sometimes systems like Beats would want to extract the date's timezone and/or locale from a value in a field of the document. This PR adds support for mustache templating to extract these values. Closes #24024. --- .../ingest/common/DateProcessor.java | 49 +++++++++---- .../ingest/common/IngestCommonPlugin.java | 2 +- .../common/DateProcessorFactoryTests.java | 68 ++++--------------- .../ingest/common/DateProcessorTests.java | 64 ++++++++++++++--- 4 files changed, 104 insertions(+), 79 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java index f1e7dcdcf55b0..4a9654f8cd0fe 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java @@ -20,11 +20,14 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.script.TemplateScript; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.ISODateTimeFormat; @@ -40,14 +43,15 @@ public final class DateProcessor extends AbstractProcessor { public static final String TYPE = "date"; static final String DEFAULT_TARGET_FIELD = "@timestamp"; - private final DateTimeZone timezone; - private final Locale locale; + private final TemplateScript.Factory timezone; + private final TemplateScript.Factory locale; private final String field; private final String targetField; private final List formats; - private final List> dateParsers; + private final List, Function>> dateParsers; - DateProcessor(String tag, DateTimeZone timezone, Locale locale, String field, List formats, String targetField) { + DateProcessor(String tag, @Nullable TemplateScript.Factory timezone, @Nullable TemplateScript.Factory locale, + String field, List formats, String targetField) { super(tag); this.timezone = timezone; this.locale = locale; @@ -57,10 +61,18 @@ public final class DateProcessor extends AbstractProcessor { this.dateParsers = new ArrayList<>(this.formats.size()); for (String format : formats) { DateFormat dateFormat = DateFormat.fromString(format); - dateParsers.add(dateFormat.getFunction(format, timezone, locale)); + dateParsers.add((params) -> dateFormat.getFunction(format, newDateTimeZone(params), newLocale(params))); } } + private DateTimeZone newDateTimeZone(Map params) { + return timezone == null ? DateTimeZone.UTC : DateTimeZone.forID(timezone.newInstance(params).execute()); + } + + private Locale newLocale(Map params) { + return (locale == null) ? Locale.ROOT : LocaleUtils.parse(locale.newInstance(params).execute()); + } + @Override public void execute(IngestDocument ingestDocument) { Object obj = ingestDocument.getFieldValue(field, Object.class); @@ -72,9 +84,9 @@ public void execute(IngestDocument ingestDocument) { DateTime dateTime = null; Exception lastException = null; - for (Function dateParser : dateParsers) { + for (Function, Function> dateParser : dateParsers) { try { - dateTime = dateParser.apply(value); + dateTime = dateParser.apply(ingestDocument.getSourceAndMetadata()).apply(value); } catch (Exception e) { //try the next parser and keep track of the exceptions lastException = ExceptionsHelper.useOrSuppress(lastException, e); @@ -93,11 +105,11 @@ public String getType() { return TYPE; } - DateTimeZone getTimezone() { + TemplateScript.Factory getTimezone() { return timezone; } - Locale getLocale() { + TemplateScript.Factory getLocale() { return locale; } @@ -115,19 +127,30 @@ List getFormats() { public static final class Factory implements Processor.Factory { + private final ScriptService scriptService; + + public Factory(ScriptService scriptService) { + this.scriptService = scriptService; + } + public DateProcessor create(Map registry, String processorTag, Map config) throws Exception { String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", DEFAULT_TARGET_FIELD); String timezoneString = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "timezone"); - DateTimeZone timezone = timezoneString == null ? DateTimeZone.UTC : DateTimeZone.forID(timezoneString); + TemplateScript.Factory compiledTimezoneTemplate = null; + if (timezoneString != null) { + compiledTimezoneTemplate = ConfigurationUtils.compileTemplate(TYPE, processorTag, + "timezone", timezoneString, scriptService); + } String localeString = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "locale"); - Locale locale = Locale.ROOT; + TemplateScript.Factory compiledLocaleTemplate = null; if (localeString != null) { - locale = LocaleUtils.parse(localeString); + compiledLocaleTemplate = ConfigurationUtils.compileTemplate(TYPE, processorTag, + "locale", localeString, scriptService); } List formats = ConfigurationUtils.readList(TYPE, processorTag, config, "formats"); - return new DateProcessor(processorTag, timezone, locale, field, formats, targetField); + return new DateProcessor(processorTag, compiledTimezoneTemplate, compiledLocaleTemplate, field, formats, targetField); } } } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 245ddc32c071d..0182e290d72b4 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -70,7 +70,7 @@ public IngestCommonPlugin() throws IOException { @Override public Map getProcessors(Processor.Parameters parameters) { Map processors = new HashMap<>(); - processors.put(DateProcessor.TYPE, new DateProcessor.Factory()); + processors.put(DateProcessor.TYPE, new DateProcessor.Factory(parameters.scriptService)); processors.put(SetProcessor.TYPE, new SetProcessor.Factory(parameters.scriptService)); processors.put(AppendProcessor.TYPE, new AppendProcessor.Factory(parameters.scriptService)); processors.put(RenameProcessor.TYPE, new RenameProcessor.Factory()); diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java index f722f658bd1ff..2cf11f6d215d0 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java @@ -20,8 +20,10 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.ingest.TestTemplateService; import org.elasticsearch.test.ESTestCase; import org.joda.time.DateTimeZone; +import org.junit.Before; import java.util.Arrays; import java.util.Collections; @@ -34,8 +36,14 @@ public class DateProcessorFactoryTests extends ESTestCase { + private DateProcessor.Factory factory; + + @Before + public void init() { + factory = new DateProcessor.Factory(TestTemplateService.instance()); + } + public void testBuildDefaults() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -46,12 +54,11 @@ public void testBuildDefaults() throws Exception { assertThat(processor.getField(), equalTo(sourceField)); assertThat(processor.getTargetField(), equalTo(DateProcessor.DEFAULT_TARGET_FIELD)); assertThat(processor.getFormats(), equalTo(Collections.singletonList("dd/MM/yyyyy"))); - assertThat(processor.getLocale(), equalTo(Locale.ROOT)); - assertThat(processor.getTimezone(), equalTo(DateTimeZone.UTC)); + assertNull(processor.getLocale()); + assertNull(processor.getTimezone()); } public void testMatchFieldIsMandatory() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String targetField = randomAlphaOfLengthBetween(1, 10); config.put("target_field", targetField); @@ -66,7 +73,6 @@ public void testMatchFieldIsMandatory() throws Exception { } public void testMatchFormatsIsMandatory() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); String targetField = randomAlphaOfLengthBetween(1, 10); @@ -82,7 +88,6 @@ public void testMatchFormatsIsMandatory() throws Exception { } public void testParseLocale() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -91,39 +96,10 @@ public void testParseLocale() throws Exception { config.put("locale", locale.toLanguageTag()); DateProcessor processor = factory.create(null, null, config); - assertThat(processor.getLocale().toLanguageTag(), equalTo(locale.toLanguageTag())); - } - - public void testParseInvalidLocale() throws Exception { - String[] locales = new String[] { "invalid_locale", "english", "xy", "xy-US" }; - for (String locale : locales) { - DateProcessor.Factory factory = new DateProcessor.Factory(); - Map config = new HashMap<>(); - String sourceField = randomAlphaOfLengthBetween(1, 10); - config.put("field", sourceField); - config.put("formats", Collections.singletonList("dd/MM/yyyyy")); - config.put("locale", locale); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> factory.create(null, null, config)); - assertThat(e.getMessage(), equalTo("Unknown language: " + locale.split("[_-]")[0])); - } - - locales = new String[] { "en-XY", "en-Canada" }; - for (String locale : locales) { - DateProcessor.Factory factory = new DateProcessor.Factory(); - Map config = new HashMap<>(); - String sourceField = randomAlphaOfLengthBetween(1, 10); - config.put("field", sourceField); - config.put("formats", Collections.singletonList("dd/MM/yyyyy")); - config.put("locale", locale); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> factory.create(null, null, config)); - assertThat(e.getMessage(), equalTo("Unknown country: " + locale.split("[_-]")[1])); - } + assertThat(processor.getLocale().newInstance(Collections.emptyMap()).execute(), equalTo(locale.toLanguageTag())); } public void testParseTimezone() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -132,26 +108,10 @@ public void testParseTimezone() throws Exception { DateTimeZone timezone = randomDateTimeZone(); config.put("timezone", timezone.getID()); DateProcessor processor = factory.create(null, null, config); - assertThat(processor.getTimezone(), equalTo(timezone)); - } - - public void testParseInvalidTimezone() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); - Map config = new HashMap<>(); - String sourceField = randomAlphaOfLengthBetween(1, 10); - config.put("field", sourceField); - config.put("match_formats", Collections.singletonList("dd/MM/yyyyy")); - config.put("timezone", "invalid_timezone"); - try { - factory.create(null, null, config); - fail("invalid timezone should fail"); - } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), equalTo("The datetime zone id 'invalid_timezone' is not recognised")); - } + assertThat(processor.getTimezone().newInstance(Collections.emptyMap()).execute(), equalTo(timezone.getID())); } public void testParseMatchFormats() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -162,7 +122,6 @@ public void testParseMatchFormats() throws Exception { } public void testParseMatchFormatsFailure() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -177,7 +136,6 @@ public void testParseMatchFormatsFailure() throws Exception { } public void testParseTargetField() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); String targetField = randomAlphaOfLengthBetween(1, 10); diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java index cc68340ec59f4..8fba759aa16f9 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java @@ -21,6 +21,8 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; +import org.elasticsearch.ingest.TestTemplateService; +import org.elasticsearch.script.TemplateScript; import org.elasticsearch.test.ESTestCase; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; @@ -34,11 +36,19 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; +import static org.joda.time.DateTimeZone.UTC; public class DateProcessorTests extends ESTestCase { + private TemplateScript.Factory templatize(Locale locale) { + return new TestTemplateService.MockTemplateScript.Factory(locale.getLanguage()); + } + private TemplateScript.Factory templatize(DateTimeZone timezone) { + return new TestTemplateService.MockTemplateScript.Factory(timezone.getID()); + } public void testJodaPattern() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ENGLISH, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ENGLISH), "date_as_string", Collections.singletonList("yyyy dd MM hh:mm:ss"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "2010 12 06 11:05:15"); @@ -52,7 +62,8 @@ public void testJodaPatternMultipleFormats() { matchFormats.add("yyyy dd MM"); matchFormats.add("dd/MM/yyyy"); matchFormats.add("dd-MM-yyyy"); - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ENGLISH, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ENGLISH), "date_as_string", matchFormats, "date_as_date"); Map document = new HashMap<>(); @@ -86,16 +97,22 @@ public void testJodaPatternMultipleFormats() { public void testInvalidJodaPattern() { try { - new DateProcessor(randomAlphaOfLength(10), DateTimeZone.UTC, randomLocale(random()), + DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), + templatize(UTC), templatize(randomLocale(random())), "date_as_string", Collections.singletonList("invalid pattern"), "date_as_date"); - fail("date processor initialization should have failed"); + Map document = new HashMap<>(); + document.put("date_as_string", "2010"); + processor.execute(RandomDocumentPicks.randomIngestDocument(random(), document)); + fail("date processor execution should have failed"); } catch(IllegalArgumentException e) { - assertThat(e.getMessage(), equalTo("Illegal pattern component: i")); + assertThat(e.getMessage(), equalTo("unable to parse date [2010]")); + assertThat(e.getCause().getMessage(), equalTo("Illegal pattern component: i")); } } public void testJodaPatternLocale() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ITALIAN, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ITALIAN), "date_as_string", Collections.singletonList("yyyy dd MMM"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "2010 12 giugno"); @@ -105,7 +122,8 @@ public void testJodaPatternLocale() { } public void testJodaPatternDefaultYear() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ENGLISH, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ENGLISH), "date_as_string", Collections.singletonList("dd/MM"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "12/06"); @@ -116,7 +134,8 @@ public void testJodaPatternDefaultYear() { } public void testTAI64N() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forOffsetHours(2), randomLocale(random()), + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), templatize(DateTimeZone.forOffsetHours(2)), + templatize(randomLocale(random())), "date_as_string", Collections.singletonList("TAI64N"), "date_as_date"); Map document = new HashMap<>(); String dateAsString = (randomBoolean() ? "@" : "") + "4000000050d506482dbdf024"; @@ -127,7 +146,7 @@ public void testTAI64N() { } public void testUnixMs() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.UTC, randomLocale(random()), + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), templatize(UTC), templatize(randomLocale(random())), "date_as_string", Collections.singletonList("UNIX_MS"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "1000500"); @@ -143,7 +162,8 @@ public void testUnixMs() { } public void testUnix() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.UTC, randomLocale(random()), + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), templatize(UTC), + templatize(randomLocale(random())), "date_as_string", Collections.singletonList("UNIX"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "1000.5"); @@ -151,4 +171,28 @@ public void testUnix() { dateProcessor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue("date_as_date", String.class), equalTo("1970-01-01T00:16:40.500Z")); } + + public void testInvalidTimezone() { + DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), + new TestTemplateService.MockTemplateScript.Factory("invalid_timezone"), templatize(randomLocale(random())), + "date_as_string", Collections.singletonList("yyyy"), "date_as_date"); + Map document = new HashMap<>(); + document.put("date_as_string", "2010"); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> processor.execute(RandomDocumentPicks.randomIngestDocument(random(), document))); + assertThat(e.getMessage(), equalTo("unable to parse date [2010]")); + assertThat(e.getCause().getMessage(), equalTo("The datetime zone id 'invalid_timezone' is not recognised")); + } + + public void testInvalidLocale() { + DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), + templatize(UTC), new TestTemplateService.MockTemplateScript.Factory("invalid_locale"), + "date_as_string", Collections.singletonList("yyyy"), "date_as_date"); + Map document = new HashMap<>(); + document.put("date_as_string", "2010"); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> processor.execute(RandomDocumentPicks.randomIngestDocument(random(), document))); + assertThat(e.getMessage(), equalTo("unable to parse date [2010]")); + assertThat(e.getCause().getMessage(), equalTo("Unknown language: invalid")); + } } From fd00e3b86f787de503cceeec3a9f65761f6a6808 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Wed, 8 Nov 2017 13:58:41 -0800 Subject: [PATCH 2/3] add docs --- docs/reference/ingest/ingest-node.asciidoc | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 74cfabbff47a1..07dadac1ab90d 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -852,6 +852,30 @@ Here is an example that adds the parsed date to the `timestamp` field based on t -------------------------------------------------- // NOTCONSOLE +The `timezone` and `locale` fields are templated. This means that their values can be +extracted from fields within documents. The example below shows how to extract the locale/timezone +details from existing fields, `my_timezone` and `my_locale`, in the ingested document that contain +the timezone and locale values. + +[source,js] +-------------------------------------------------- +{ + "description" : "...", + "processors" : [ + { + "date" : { + "field" : "initial_date", + "target_field" : "timestamp", + "formats" : ["ISO8601"], + "timezone" : "{{ my_timezone }}", + "locale" : "{{ my_locale }}" + } + } + ] +} +-------------------------------------------------- +// NOTCONSOLE + [[date-index-name-processor]] === Date Index Name Processor From 36c200c6a877254a2a94b7a9157b6eefa33d3e20 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Wed, 8 Nov 2017 14:34:50 -0800 Subject: [PATCH 3/3] reword field -> processor parameters --- docs/reference/ingest/ingest-node.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 07dadac1ab90d..720a180934324 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -852,7 +852,7 @@ Here is an example that adds the parsed date to the `timestamp` field based on t -------------------------------------------------- // NOTCONSOLE -The `timezone` and `locale` fields are templated. This means that their values can be +The `timezone` and `locale` processor parameters are templated. This means that their values can be extracted from fields within documents. The example below shows how to extract the locale/timezone details from existing fields, `my_timezone` and `my_locale`, in the ingested document that contain the timezone and locale values.