From 15a890b71f93206bc10fdd7e1662fd7f9a0edaae Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Thu, 9 Nov 2017 09:45:32 -0800 Subject: [PATCH] Introduce templating support to timezone/locale in DateProcessor (#27089) Sometimes systems like Beats would want to extract the date's timezone and/or locale from a value in a field of the document. This PR adds support for mustache templating to extract these values. Closes #24024. --- docs/reference/ingest/ingest-node.asciidoc | 24 +++++++ .../ingest/common/DateProcessor.java | 49 +++++++++---- .../ingest/common/IngestCommonPlugin.java | 2 +- .../common/DateProcessorFactoryTests.java | 68 ++++--------------- .../ingest/common/DateProcessorTests.java | 64 ++++++++++++++--- 5 files changed, 128 insertions(+), 79 deletions(-) diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 74cfabbff47a1..720a180934324 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -852,6 +852,30 @@ Here is an example that adds the parsed date to the `timestamp` field based on t -------------------------------------------------- // NOTCONSOLE +The `timezone` and `locale` processor parameters are templated. This means that their values can be +extracted from fields within documents. The example below shows how to extract the locale/timezone +details from existing fields, `my_timezone` and `my_locale`, in the ingested document that contain +the timezone and locale values. + +[source,js] +-------------------------------------------------- +{ + "description" : "...", + "processors" : [ + { + "date" : { + "field" : "initial_date", + "target_field" : "timestamp", + "formats" : ["ISO8601"], + "timezone" : "{{ my_timezone }}", + "locale" : "{{ my_locale }}" + } + } + ] +} +-------------------------------------------------- +// NOTCONSOLE + [[date-index-name-processor]] === Date Index Name Processor diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java index f1e7dcdcf55b0..4a9654f8cd0fe 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java @@ -20,11 +20,14 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.script.TemplateScript; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.ISODateTimeFormat; @@ -40,14 +43,15 @@ public final class DateProcessor extends AbstractProcessor { public static final String TYPE = "date"; static final String DEFAULT_TARGET_FIELD = "@timestamp"; - private final DateTimeZone timezone; - private final Locale locale; + private final TemplateScript.Factory timezone; + private final TemplateScript.Factory locale; private final String field; private final String targetField; private final List formats; - private final List> dateParsers; + private final List, Function>> dateParsers; - DateProcessor(String tag, DateTimeZone timezone, Locale locale, String field, List formats, String targetField) { + DateProcessor(String tag, @Nullable TemplateScript.Factory timezone, @Nullable TemplateScript.Factory locale, + String field, List formats, String targetField) { super(tag); this.timezone = timezone; this.locale = locale; @@ -57,10 +61,18 @@ public final class DateProcessor extends AbstractProcessor { this.dateParsers = new ArrayList<>(this.formats.size()); for (String format : formats) { DateFormat dateFormat = DateFormat.fromString(format); - dateParsers.add(dateFormat.getFunction(format, timezone, locale)); + dateParsers.add((params) -> dateFormat.getFunction(format, newDateTimeZone(params), newLocale(params))); } } + private DateTimeZone newDateTimeZone(Map params) { + return timezone == null ? DateTimeZone.UTC : DateTimeZone.forID(timezone.newInstance(params).execute()); + } + + private Locale newLocale(Map params) { + return (locale == null) ? Locale.ROOT : LocaleUtils.parse(locale.newInstance(params).execute()); + } + @Override public void execute(IngestDocument ingestDocument) { Object obj = ingestDocument.getFieldValue(field, Object.class); @@ -72,9 +84,9 @@ public void execute(IngestDocument ingestDocument) { DateTime dateTime = null; Exception lastException = null; - for (Function dateParser : dateParsers) { + for (Function, Function> dateParser : dateParsers) { try { - dateTime = dateParser.apply(value); + dateTime = dateParser.apply(ingestDocument.getSourceAndMetadata()).apply(value); } catch (Exception e) { //try the next parser and keep track of the exceptions lastException = ExceptionsHelper.useOrSuppress(lastException, e); @@ -93,11 +105,11 @@ public String getType() { return TYPE; } - DateTimeZone getTimezone() { + TemplateScript.Factory getTimezone() { return timezone; } - Locale getLocale() { + TemplateScript.Factory getLocale() { return locale; } @@ -115,19 +127,30 @@ List getFormats() { public static final class Factory implements Processor.Factory { + private final ScriptService scriptService; + + public Factory(ScriptService scriptService) { + this.scriptService = scriptService; + } + public DateProcessor create(Map registry, String processorTag, Map config) throws Exception { String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", DEFAULT_TARGET_FIELD); String timezoneString = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "timezone"); - DateTimeZone timezone = timezoneString == null ? DateTimeZone.UTC : DateTimeZone.forID(timezoneString); + TemplateScript.Factory compiledTimezoneTemplate = null; + if (timezoneString != null) { + compiledTimezoneTemplate = ConfigurationUtils.compileTemplate(TYPE, processorTag, + "timezone", timezoneString, scriptService); + } String localeString = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "locale"); - Locale locale = Locale.ROOT; + TemplateScript.Factory compiledLocaleTemplate = null; if (localeString != null) { - locale = LocaleUtils.parse(localeString); + compiledLocaleTemplate = ConfigurationUtils.compileTemplate(TYPE, processorTag, + "locale", localeString, scriptService); } List formats = ConfigurationUtils.readList(TYPE, processorTag, config, "formats"); - return new DateProcessor(processorTag, timezone, locale, field, formats, targetField); + return new DateProcessor(processorTag, compiledTimezoneTemplate, compiledLocaleTemplate, field, formats, targetField); } } } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 245ddc32c071d..0182e290d72b4 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -70,7 +70,7 @@ public IngestCommonPlugin() throws IOException { @Override public Map getProcessors(Processor.Parameters parameters) { Map processors = new HashMap<>(); - processors.put(DateProcessor.TYPE, new DateProcessor.Factory()); + processors.put(DateProcessor.TYPE, new DateProcessor.Factory(parameters.scriptService)); processors.put(SetProcessor.TYPE, new SetProcessor.Factory(parameters.scriptService)); processors.put(AppendProcessor.TYPE, new AppendProcessor.Factory(parameters.scriptService)); processors.put(RenameProcessor.TYPE, new RenameProcessor.Factory()); diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java index f722f658bd1ff..2cf11f6d215d0 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorFactoryTests.java @@ -20,8 +20,10 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.ingest.TestTemplateService; import org.elasticsearch.test.ESTestCase; import org.joda.time.DateTimeZone; +import org.junit.Before; import java.util.Arrays; import java.util.Collections; @@ -34,8 +36,14 @@ public class DateProcessorFactoryTests extends ESTestCase { + private DateProcessor.Factory factory; + + @Before + public void init() { + factory = new DateProcessor.Factory(TestTemplateService.instance()); + } + public void testBuildDefaults() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -46,12 +54,11 @@ public void testBuildDefaults() throws Exception { assertThat(processor.getField(), equalTo(sourceField)); assertThat(processor.getTargetField(), equalTo(DateProcessor.DEFAULT_TARGET_FIELD)); assertThat(processor.getFormats(), equalTo(Collections.singletonList("dd/MM/yyyyy"))); - assertThat(processor.getLocale(), equalTo(Locale.ROOT)); - assertThat(processor.getTimezone(), equalTo(DateTimeZone.UTC)); + assertNull(processor.getLocale()); + assertNull(processor.getTimezone()); } public void testMatchFieldIsMandatory() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String targetField = randomAlphaOfLengthBetween(1, 10); config.put("target_field", targetField); @@ -66,7 +73,6 @@ public void testMatchFieldIsMandatory() throws Exception { } public void testMatchFormatsIsMandatory() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); String targetField = randomAlphaOfLengthBetween(1, 10); @@ -82,7 +88,6 @@ public void testMatchFormatsIsMandatory() throws Exception { } public void testParseLocale() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -91,39 +96,10 @@ public void testParseLocale() throws Exception { config.put("locale", locale.toLanguageTag()); DateProcessor processor = factory.create(null, null, config); - assertThat(processor.getLocale().toLanguageTag(), equalTo(locale.toLanguageTag())); - } - - public void testParseInvalidLocale() throws Exception { - String[] locales = new String[] { "invalid_locale", "english", "xy", "xy-US" }; - for (String locale : locales) { - DateProcessor.Factory factory = new DateProcessor.Factory(); - Map config = new HashMap<>(); - String sourceField = randomAlphaOfLengthBetween(1, 10); - config.put("field", sourceField); - config.put("formats", Collections.singletonList("dd/MM/yyyyy")); - config.put("locale", locale); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> factory.create(null, null, config)); - assertThat(e.getMessage(), equalTo("Unknown language: " + locale.split("[_-]")[0])); - } - - locales = new String[] { "en-XY", "en-Canada" }; - for (String locale : locales) { - DateProcessor.Factory factory = new DateProcessor.Factory(); - Map config = new HashMap<>(); - String sourceField = randomAlphaOfLengthBetween(1, 10); - config.put("field", sourceField); - config.put("formats", Collections.singletonList("dd/MM/yyyyy")); - config.put("locale", locale); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> factory.create(null, null, config)); - assertThat(e.getMessage(), equalTo("Unknown country: " + locale.split("[_-]")[1])); - } + assertThat(processor.getLocale().newInstance(Collections.emptyMap()).execute(), equalTo(locale.toLanguageTag())); } public void testParseTimezone() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -132,26 +108,10 @@ public void testParseTimezone() throws Exception { DateTimeZone timezone = randomDateTimeZone(); config.put("timezone", timezone.getID()); DateProcessor processor = factory.create(null, null, config); - assertThat(processor.getTimezone(), equalTo(timezone)); - } - - public void testParseInvalidTimezone() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); - Map config = new HashMap<>(); - String sourceField = randomAlphaOfLengthBetween(1, 10); - config.put("field", sourceField); - config.put("match_formats", Collections.singletonList("dd/MM/yyyyy")); - config.put("timezone", "invalid_timezone"); - try { - factory.create(null, null, config); - fail("invalid timezone should fail"); - } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), equalTo("The datetime zone id 'invalid_timezone' is not recognised")); - } + assertThat(processor.getTimezone().newInstance(Collections.emptyMap()).execute(), equalTo(timezone.getID())); } public void testParseMatchFormats() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -162,7 +122,6 @@ public void testParseMatchFormats() throws Exception { } public void testParseMatchFormatsFailure() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); config.put("field", sourceField); @@ -177,7 +136,6 @@ public void testParseMatchFormatsFailure() throws Exception { } public void testParseTargetField() throws Exception { - DateProcessor.Factory factory = new DateProcessor.Factory(); Map config = new HashMap<>(); String sourceField = randomAlphaOfLengthBetween(1, 10); String targetField = randomAlphaOfLengthBetween(1, 10); diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java index cc68340ec59f4..8fba759aa16f9 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateProcessorTests.java @@ -21,6 +21,8 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.RandomDocumentPicks; +import org.elasticsearch.ingest.TestTemplateService; +import org.elasticsearch.script.TemplateScript; import org.elasticsearch.test.ESTestCase; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; @@ -34,11 +36,19 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; +import static org.joda.time.DateTimeZone.UTC; public class DateProcessorTests extends ESTestCase { + private TemplateScript.Factory templatize(Locale locale) { + return new TestTemplateService.MockTemplateScript.Factory(locale.getLanguage()); + } + private TemplateScript.Factory templatize(DateTimeZone timezone) { + return new TestTemplateService.MockTemplateScript.Factory(timezone.getID()); + } public void testJodaPattern() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ENGLISH, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ENGLISH), "date_as_string", Collections.singletonList("yyyy dd MM hh:mm:ss"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "2010 12 06 11:05:15"); @@ -52,7 +62,8 @@ public void testJodaPatternMultipleFormats() { matchFormats.add("yyyy dd MM"); matchFormats.add("dd/MM/yyyy"); matchFormats.add("dd-MM-yyyy"); - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ENGLISH, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ENGLISH), "date_as_string", matchFormats, "date_as_date"); Map document = new HashMap<>(); @@ -86,16 +97,22 @@ public void testJodaPatternMultipleFormats() { public void testInvalidJodaPattern() { try { - new DateProcessor(randomAlphaOfLength(10), DateTimeZone.UTC, randomLocale(random()), + DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), + templatize(UTC), templatize(randomLocale(random())), "date_as_string", Collections.singletonList("invalid pattern"), "date_as_date"); - fail("date processor initialization should have failed"); + Map document = new HashMap<>(); + document.put("date_as_string", "2010"); + processor.execute(RandomDocumentPicks.randomIngestDocument(random(), document)); + fail("date processor execution should have failed"); } catch(IllegalArgumentException e) { - assertThat(e.getMessage(), equalTo("Illegal pattern component: i")); + assertThat(e.getMessage(), equalTo("unable to parse date [2010]")); + assertThat(e.getCause().getMessage(), equalTo("Illegal pattern component: i")); } } public void testJodaPatternLocale() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ITALIAN, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ITALIAN), "date_as_string", Collections.singletonList("yyyy dd MMM"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "2010 12 giugno"); @@ -105,7 +122,8 @@ public void testJodaPatternLocale() { } public void testJodaPatternDefaultYear() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forID("Europe/Amsterdam"), Locale.ENGLISH, + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), + templatize(DateTimeZone.forID("Europe/Amsterdam")), templatize(Locale.ENGLISH), "date_as_string", Collections.singletonList("dd/MM"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "12/06"); @@ -116,7 +134,8 @@ public void testJodaPatternDefaultYear() { } public void testTAI64N() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.forOffsetHours(2), randomLocale(random()), + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), templatize(DateTimeZone.forOffsetHours(2)), + templatize(randomLocale(random())), "date_as_string", Collections.singletonList("TAI64N"), "date_as_date"); Map document = new HashMap<>(); String dateAsString = (randomBoolean() ? "@" : "") + "4000000050d506482dbdf024"; @@ -127,7 +146,7 @@ public void testTAI64N() { } public void testUnixMs() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.UTC, randomLocale(random()), + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), templatize(UTC), templatize(randomLocale(random())), "date_as_string", Collections.singletonList("UNIX_MS"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "1000500"); @@ -143,7 +162,8 @@ public void testUnixMs() { } public void testUnix() { - DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), DateTimeZone.UTC, randomLocale(random()), + DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10), templatize(UTC), + templatize(randomLocale(random())), "date_as_string", Collections.singletonList("UNIX"), "date_as_date"); Map document = new HashMap<>(); document.put("date_as_string", "1000.5"); @@ -151,4 +171,28 @@ public void testUnix() { dateProcessor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue("date_as_date", String.class), equalTo("1970-01-01T00:16:40.500Z")); } + + public void testInvalidTimezone() { + DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), + new TestTemplateService.MockTemplateScript.Factory("invalid_timezone"), templatize(randomLocale(random())), + "date_as_string", Collections.singletonList("yyyy"), "date_as_date"); + Map document = new HashMap<>(); + document.put("date_as_string", "2010"); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> processor.execute(RandomDocumentPicks.randomIngestDocument(random(), document))); + assertThat(e.getMessage(), equalTo("unable to parse date [2010]")); + assertThat(e.getCause().getMessage(), equalTo("The datetime zone id 'invalid_timezone' is not recognised")); + } + + public void testInvalidLocale() { + DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), + templatize(UTC), new TestTemplateService.MockTemplateScript.Factory("invalid_locale"), + "date_as_string", Collections.singletonList("yyyy"), "date_as_date"); + Map document = new HashMap<>(); + document.put("date_as_string", "2010"); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> processor.execute(RandomDocumentPicks.randomIngestDocument(random(), document))); + assertThat(e.getMessage(), equalTo("unable to parse date [2010]")); + assertThat(e.getCause().getMessage(), equalTo("Unknown language: invalid")); + } }