diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml
index 07dd29a33ad68..85b61e2c9867c 100644
--- a/buildSrc/src/main/resources/checkstyle_suppressions.xml
+++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@@ -441,7 +441,6 @@
-
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java
index b7481e78496e5..1054721535eac 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/Analysis.java
@@ -67,8 +67,10 @@
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.CharacterCodingException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -163,7 +165,8 @@ public static CharArraySet parseStemExclusion(Settings settings, CharArraySet de
NAMED_STOP_WORDS = unmodifiableMap(namedStopWords);
}
- public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, Map> namedWords, boolean ignoreCase) {
+ public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords,
+ Map> namedWords, boolean ignoreCase) {
String value = settings.get(name);
if (value != null) {
if ("_none_".equals(value)) {
@@ -237,12 +240,17 @@ public static List getWordList(Environment env, Settings settings, Strin
}
}
- final Path wordListFile = env.configFile().resolve(wordListPath);
+ final Path path = env.configFile().resolve(wordListPath);
- try (BufferedReader reader = FileSystemUtils.newBufferedReader(wordListFile.toUri().toURL(), StandardCharsets.UTF_8)) {
+ try (BufferedReader reader = FileSystemUtils.newBufferedReader(path.toUri().toURL(), StandardCharsets.UTF_8)) {
return loadWordList(reader, "#");
+ } catch (CharacterCodingException ex) {
+ String message = String.format(Locale.ROOT,
+ "Unsupported character encoding detected while reading %s_path: %s - files must be UTF-8 encoded",
+ settingPrefix, path.toString());
+ throw new IllegalArgumentException(message, ex);
} catch (IOException ioe) {
- String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix);
+ String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ioe);
}
}
@@ -256,7 +264,7 @@ public static List loadWordList(Reader reader, String comment) throws IO
} else {
br = new BufferedReader(reader);
}
- String word = null;
+ String word;
while ((word = br.readLine()) != null) {
if (!Strings.hasText(word)) {
continue;
@@ -283,13 +291,16 @@ public static Reader getReaderFromFile(Environment env, Settings settings, Strin
if (filePath == null) {
return null;
}
-
final Path path = env.configFile().resolve(filePath);
-
try {
return FileSystemUtils.newBufferedReader(path.toUri().toURL(), StandardCharsets.UTF_8);
+ } catch (CharacterCodingException ex) {
+ String message = String.format(Locale.ROOT,
+ "Unsupported character encoding detected while reading %s_path: %s files must be UTF-8 encoded",
+ settingPrefix, path.toString());
+ throw new IllegalArgumentException(message, ex);
} catch (IOException ioe) {
- String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix);
+ String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ioe);
}
}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java
index 061e0d9d29fc7..37943773cef2b 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java
@@ -21,8 +21,23 @@
import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
import org.elasticsearch.test.ESTestCase;
+import java.io.BufferedWriter;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.MalformedInputException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+
import static org.elasticsearch.common.settings.Settings.settingsBuilder;
import static org.hamcrest.Matchers.is;
@@ -42,4 +57,55 @@ public void testParseStemExclusion() {
assertThat(set.contains("bar"), is(true));
assertThat(set.contains("baz"), is(false));
}
+
+ public void testParseNonExistingFile() {
+ Path tempDir = createTempDir();
+ Settings nodeSettings = Settings.builder()
+ .put("foo.bar_path", tempDir.resolve("foo.dict"))
+ .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+ Environment env = new Environment(nodeSettings);
+ IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
+ () -> Analysis.getWordList(env, nodeSettings, "foo.bar"));
+ assertEquals("IOException while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString(), ex.getMessage());
+ assertTrue(ex.getCause().toString(), ex.getCause() instanceof FileNotFoundException
+ || ex.getCause() instanceof NoSuchFileException);
+ }
+
+
+ public void testParseFalseEncodedFile() throws IOException {
+ Path tempDir = createTempDir();
+ Path dict = tempDir.resolve("foo.dict");
+ Settings nodeSettings = Settings.builder()
+ .put("foo.bar_path", dict)
+ .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+ try (OutputStream writer = Files.newOutputStream(dict)) {
+ writer.write(new byte[]{(byte) 0xff, 0x00, 0x00}); // some invalid UTF-8
+ writer.write('\n');
+ }
+ Environment env = new Environment(nodeSettings);
+ IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
+ () -> Analysis.getWordList(env, nodeSettings, "foo.bar"));
+ assertEquals("Unsupported character encoding detected while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString()
+ + " - files must be UTF-8 encoded" , ex.getMessage());
+ assertTrue(ex.getCause().toString(), ex.getCause() instanceof MalformedInputException
+ || ex.getCause() instanceof CharacterCodingException);
+ }
+
+ public void testParseWordList() throws IOException {
+ Path tempDir = createTempDir();
+ Path dict = tempDir.resolve("foo.dict");
+ Settings nodeSettings = Settings.builder()
+ .put("foo.bar_path", dict)
+ .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
+ try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) {
+ writer.write("hello");
+ writer.write('\n');
+ writer.write("world");
+ writer.write('\n');
+ }
+ Environment env = new Environment(nodeSettings);
+ List wordList = Analysis.getWordList(env, nodeSettings, "foo.bar");
+ assertEquals(Arrays.asList("hello", "world"), wordList);
+
+ }
}