Skip to content

Commit

Permalink
Improve error message if resource files have illegal encoding
Browse files Browse the repository at this point in the history
This commit fixes string formatting issues in the error handling and
provides a bettter error message if malformed input is detected.
This commit also adds tests for both situations.

Relates to #17212
  • Loading branch information
s1monw committed Mar 22, 2016
1 parent ff021c6 commit 75d5b83
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 9 deletions.
1 change: 0 additions & 1 deletion buildSrc/src/main/resources/checkstyle_suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergeSchedulerConfig.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]NodeServicesProvider.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLog.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]Analysis.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisRegistry.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisService.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CommonGramsTokenFilterFactory.java" checks="LineLength" />
Expand Down
27 changes: 19 additions & 8 deletions core/src/main/java/org/elasticsearch/index/analysis/Analysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand Down Expand Up @@ -163,7 +165,8 @@ public static CharArraySet parseStemExclusion(Settings settings, CharArraySet de
NAMED_STOP_WORDS = unmodifiableMap(namedStopWords);
}

public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, Map<String, Set<?>> namedWords, boolean ignoreCase) {
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords,
Map<String, Set<?>> namedWords, boolean ignoreCase) {
String value = settings.get(name);
if (value != null) {
if ("_none_".equals(value)) {
Expand Down Expand Up @@ -237,12 +240,17 @@ public static List<String> getWordList(Environment env, Settings settings, Strin
}
}

final Path wordListFile = env.configFile().resolve(wordListPath);
final Path path = env.configFile().resolve(wordListPath);

try (BufferedReader reader = FileSystemUtils.newBufferedReader(wordListFile.toUri().toURL(), StandardCharsets.UTF_8)) {
try (BufferedReader reader = FileSystemUtils.newBufferedReader(path.toUri().toURL(), StandardCharsets.UTF_8)) {
return loadWordList(reader, "#");
} catch (CharacterCodingException ex) {
String message = String.format(Locale.ROOT,
"Unsupported character encoding detected while reading %s_path: %s - files must be UTF-8 encoded",
settingPrefix, path.toString());
throw new IllegalArgumentException(message, ex);
} catch (IOException ioe) {
String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix);
String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ioe);
}
}
Expand All @@ -256,7 +264,7 @@ public static List<String> loadWordList(Reader reader, String comment) throws IO
} else {
br = new BufferedReader(reader);
}
String word = null;
String word;
while ((word = br.readLine()) != null) {
if (!Strings.hasText(word)) {
continue;
Expand All @@ -283,13 +291,16 @@ public static Reader getReaderFromFile(Environment env, Settings settings, Strin
if (filePath == null) {
return null;
}

final Path path = env.configFile().resolve(filePath);

try {
return FileSystemUtils.newBufferedReader(path.toUri().toURL(), StandardCharsets.UTF_8);
} catch (CharacterCodingException ex) {
String message = String.format(Locale.ROOT,
"Unsupported character encoding detected while reading %s_path: %s files must be UTF-8 encoded",
settingPrefix, path.toString());
throw new IllegalArgumentException(message, ex);
} catch (IOException ioe) {
String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix);
String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ioe);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,23 @@

import org.apache.lucene.analysis.util.CharArraySet;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.test.ESTestCase;

import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;

import static org.elasticsearch.common.settings.Settings.settingsBuilder;
import static org.hamcrest.Matchers.is;

Expand All @@ -42,4 +57,55 @@ public void testParseStemExclusion() {
assertThat(set.contains("bar"), is(true));
assertThat(set.contains("baz"), is(false));
}

public void testParseNonExistingFile() {
Path tempDir = createTempDir();
Settings nodeSettings = Settings.builder()
.put("foo.bar_path", tempDir.resolve("foo.dict"))
.put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
Environment env = new Environment(nodeSettings);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> Analysis.getWordList(env, nodeSettings, "foo.bar"));
assertEquals("IOException while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString(), ex.getMessage());
assertTrue(ex.getCause().toString(), ex.getCause() instanceof FileNotFoundException
|| ex.getCause() instanceof NoSuchFileException);
}


public void testParseFalseEncodedFile() throws IOException {
Path tempDir = createTempDir();
Path dict = tempDir.resolve("foo.dict");
Settings nodeSettings = Settings.builder()
.put("foo.bar_path", dict)
.put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
try (OutputStream writer = Files.newOutputStream(dict)) {
writer.write(new byte[]{(byte) 0xff, 0x00, 0x00}); // some invalid UTF-8
writer.write('\n');
}
Environment env = new Environment(nodeSettings);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> Analysis.getWordList(env, nodeSettings, "foo.bar"));
assertEquals("Unsupported character encoding detected while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString()
+ " - files must be UTF-8 encoded" , ex.getMessage());
assertTrue(ex.getCause().toString(), ex.getCause() instanceof MalformedInputException
|| ex.getCause() instanceof CharacterCodingException);
}

public void testParseWordList() throws IOException {
Path tempDir = createTempDir();
Path dict = tempDir.resolve("foo.dict");
Settings nodeSettings = Settings.builder()
.put("foo.bar_path", dict)
.put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) {
writer.write("hello");
writer.write('\n');
writer.write("world");
writer.write('\n');
}
Environment env = new Environment(nodeSettings);
List<String> wordList = Analysis.getWordList(env, nodeSettings, "foo.bar");
assertEquals(Arrays.asList("hello", "world"), wordList);

}
}

0 comments on commit 75d5b83

Please sign in to comment.