Skip to content

Commit

Permalink
Check stemmer language setting early (#34601)
Browse files Browse the repository at this point in the history
Currently the StemmerTokenFilterFactory checks the validity of the language
setting only when the first TokenStream is processed. Instead we should throw an
error earlier at mapping creation time. This change adds a check to the
StemmerTokenFilterFactory constructor that checks for a valid `language` setting
by trying to create a new TokenStream from an empty input stream. This will
throw errors about wrong language settings early on.

Closes #34170
  • Loading branch information
Christoph Büscher authored and kcm committed Oct 30, 2018
1 parent cd07abf commit 86ae7dd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.lucene.analysis.id.IndonesianStemFilter;
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
import org.apache.lucene.analysis.lv.LatvianStemFilter;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
import org.apache.lucene.analysis.no.NorwegianLightStemmer;
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
Expand Down Expand Up @@ -82,13 +83,19 @@
import org.tartarus.snowball.ext.SwedishStemmer;
import org.tartarus.snowball.ext.TurkishStemmer;

import java.io.IOException;

public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {

private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();

private String language;

StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
super(indexSettings, name, settings);
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ public void testEnglishFilterFactory() throws IOException {
assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"});
}

}

public void testPorter2FilterFactory() throws IOException {
Expand Down Expand Up @@ -97,7 +96,16 @@ public void testPorter2FilterFactory() throws IOException {
assertThat(create, instanceOf(SnowballFilter.class));
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
}

}

public void testMultipleLanguagesThrowsException() throws IOException {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer")
.putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();

IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN));
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}
}

0 comments on commit 86ae7dd

Please sign in to comment.