Skip to content

Commit

Permalink
Make locale parsing less lenient. (#26361)
Browse files Browse the repository at this point in the history
The `locale` field of `date` fields accepts almost any string and unknown
locales are simply ignored, which is trappy. We should fail on unknown languages
or countries.

This commit also makes `-` an accepted separator in addition to `_` since `-`
is the recommended separator (https://tools.ietf.org/html/rfc5646#section-2.1).
`_` is probably still worth supporting since it is the separator used by
`Locale#toString()`.
  • Loading branch information
jpountz committed Aug 28, 2017
1 parent 3c5b582 commit d499467
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 23 deletions.
89 changes: 68 additions & 21 deletions core/src/main/java/org/elasticsearch/common/util/LocaleUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,88 @@
package org.elasticsearch.common.util;


import java.util.Arrays;
import java.util.Locale;
import java.util.MissingResourceException;

/**
* Utilities for for dealing with {@link Locale} objects
*/
public class LocaleUtils {

/**
* Parse the string describing a locale into a {@link Locale} object
* Parse the given locale as {@code language}, {@code language-country} or
* {@code language-country-variant}.
* Either underscores or hyphens may be used as separators, but consistently, ie.
* you may not use an hyphen to separate the language from the country and an
* underscore to separate the country from the variant.
* @throws IllegalArgumentException if there are too many parts in the locale string
* @throws IllegalArgumentException if the language or country is not recognized
*/
public static Locale parse(String localeStr) {
final String[] parts = localeStr.split("_", -1);
switch (parts.length) {
case 3:
// lang_country_variant
return new Locale(parts[0], parts[1], parts[2]);
case 2:
// lang_country
return new Locale(parts[0], parts[1]);
case 1:
if ("ROOT".equalsIgnoreCase(parts[0])) {
return Locale.ROOT;
}
// lang
return new Locale(parts[0]);
default:
throw new IllegalArgumentException("Can't parse locale: [" + localeStr + "]");
boolean useUnderscoreAsSeparator = false;
for (int i = 0; i < localeStr.length(); ++i) {
final char c = localeStr.charAt(i);
if (c == '-') {
// the locale uses - as a separator, as expected
break;
} else if (c == '_') {
useUnderscoreAsSeparator = true;
break;
}
}

final String[] parts;
if (useUnderscoreAsSeparator) {
parts = localeStr.split("_", -1);
} else {
parts = localeStr.split("-", -1);
}

final Locale locale = parseParts(parts);

try {
locale.getISO3Language();
} catch (MissingResourceException e) {
throw new IllegalArgumentException("Unknown language: " + parts[0], e);
}

try {
locale.getISO3Country();
} catch (MissingResourceException e) {
throw new IllegalArgumentException("Unknown country: " + parts[1], e);
}

return locale;
}

/**
* Return a string for a {@link Locale} object
* Parse the string describing a locale into a {@link Locale} object
* for 5.x indices.
*/
public static String toString(Locale locale) {
// JAVA7 - use .toLanguageTag instead of .toString()
return locale.toString();
@Deprecated
public static Locale parse5x(String localeStr) {
final String[] parts = localeStr.split("_", -1);
return parseParts(parts);
}

private static Locale parseParts(String[] parts) {
switch (parts.length) {
case 3:
// lang, country, variant
return new Locale(parts[0], parts[1], parts[2]);
case 2:
// lang, country
return new Locale(parts[0], parts[1]);
case 1:
if ("ROOT".equalsIgnoreCase(parts[0])) {
return Locale.ROOT;
}
// lang
return new Locale(parts[0]);
default:
throw new IllegalArgumentException("Locales can have at most 3 parts but got " + parts.length + ": " + Arrays.asList(parts));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.joda.DateMathParser;
Expand Down Expand Up @@ -154,7 +155,13 @@ public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserCo
builder.ignoreMalformed(TypeParsers.nodeBooleanValue(name, "ignore_malformed", propNode, parserContext));
iterator.remove();
} else if (propName.equals("locale")) {
builder.locale(LocaleUtils.parse(propNode.toString()));
Locale locale;
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
locale = LocaleUtils.parse(propNode.toString());
} else {
locale = LocaleUtils.parse5x(propNode.toString());
}
builder.locale(locale);
iterator.remove();
} else if (propName.equals("format")) {
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,13 @@ public Mapper.Builder<?,?> parse(String name, Map<String, Object> node,
builder.coerce(TypeParsers.nodeBooleanValue(name, "coerce", propNode, parserContext));
iterator.remove();
} else if (propName.equals("locale")) {
builder.locale(LocaleUtils.parse(propNode.toString()));
Locale locale;
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
locale = LocaleUtils.parse(propNode.toString());
} else {
locale = LocaleUtils.parse5x(propNode.toString());
}
builder.locale(locale);
iterator.remove();
} else if (propName.equals("format")) {
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.common.util;

import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matchers;

import java.util.Locale;

public class LocaleUtilsTests extends ESTestCase {

public void testIllegalLang() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("yz"));
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));

e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("yz-CA"));
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
}

public void testIllegalCountry() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("en-YZ"));
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));

e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("en-YZ-foobar"));
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
}

public void testIllegalNumberOfParts() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("en-US-foo-bar"));
assertThat(e.getMessage(), Matchers.containsString("Locales can have at most 3 parts but got 4"));
}

public void testUnderscores() {
Locale locale1 = LocaleUtils.parse("fr_FR");
Locale locale2 = LocaleUtils.parse("fr-FR");
assertEquals(locale2, locale1);
}

public void testSimple() {
assertEquals(Locale.FRENCH, LocaleUtils.parse("fr"));
assertEquals(Locale.FRANCE, LocaleUtils.parse("fr-FR"));
assertEquals(Locale.ROOT, LocaleUtils.parse("root"));
assertEquals(Locale.ROOT, LocaleUtils.parse(""));
}
}
5 changes: 5 additions & 0 deletions docs/reference/migration/migrate_6_0/mappings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,8 @@ Previously Elasticsearch would silently ignore any dynamic templates that
included a `match_mapping_type` type that was unrecognized. An exception is now
thrown on an unrecognized type.

==== Validation of `locale` on date fields

The `locale` option of `date` fields previously allowed almost any string values,
and unrecognized values would simply be ignored. Unrecognized values are now
rejected on indices created with Elasticsearch 6.0 or newer.

0 comments on commit d499467

Please sign in to comment.