Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use preconfigured filters correctly in Analyze API #43568

Merged
merged 9 commits into from
Jun 27, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ private static Settings getSettingsFromIndexSettings(IndexSettings indexSettings
private <T> T getComponentFactory(IndexSettings settings, NameOrDefinition nod,
String componentType,
Function<String, AnalysisProvider<T>> globalComponentProvider,
Function<String, AnalysisProvider<T>> prebuiltComponentProvider,
BiFunction<String, IndexSettings, AnalysisProvider<T>> indexComponentProvider) throws IOException {
if (nod.definition != null) {
// custom component, so we build it from scratch
Expand All @@ -128,10 +129,14 @@ private <T> T getComponentFactory(IndexSettings settings, NameOrDefinition nod,
return factory.get(settings, environment, "__anonymous__" + type, nod.definition);
}
if (settings == null) {
// no index provided, so we use global analysis components only
AnalysisProvider<T> factory = globalComponentProvider.apply(nod.name);
// no index provided, so we use prebuilt analysis components
AnalysisProvider<T> factory = prebuiltComponentProvider.apply(nod.name);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + nod.name + "]");
// if there's no prebuilt component, try loading a global one to build with no settings
factory = globalComponentProvider.apply(nod.name);
if (factory == null) {
throw new IllegalArgumentException("failed to find global " + componentType + " under [" + nod.name + "]");
}
}
return factory.get(environment, nod.name);
} else {
Expand Down Expand Up @@ -217,25 +222,26 @@ public IndexAnalyzers build(IndexSettings indexSettings) throws IOException {
public NamedAnalyzer buildCustomAnalyzer(IndexSettings indexSettings, boolean normalizer, NameOrDefinition tokenizer,
List<NameOrDefinition> charFilters, List<NameOrDefinition> tokenFilters) throws IOException {
TokenizerFactory tokenizerFactory
= getComponentFactory(indexSettings, tokenizer, "tokenizer", this::getTokenizerProvider, this::getTokenizerProvider);
= getComponentFactory(indexSettings, tokenizer, "tokenizer",
this::getTokenizerProvider, prebuiltAnalysis::getTokenizerFactory, this::getTokenizerProvider);

List<CharFilterFactory> charFilterFactories = new ArrayList<>();
for (NameOrDefinition nod : charFilters) {
charFilterFactories.add(getComponentFactory(indexSettings, nod, "char_filter",
this::getCharFilterProvider, this::getCharFilterProvider));
this::getCharFilterProvider, prebuiltAnalysis::getCharFilterFactory, this::getCharFilterProvider));
}

List<TokenFilterFactory> tokenFilterFactories = new ArrayList<>();
for (NameOrDefinition nod : tokenFilters) {
TokenFilterFactory tff = getComponentFactory(indexSettings, nod, "filter",
this::getTokenFilterProvider, this::getTokenFilterProvider);
this::getTokenFilterProvider, prebuiltAnalysis::getTokenFilterFactory, this::getTokenFilterProvider);
if (normalizer && tff instanceof NormalizingTokenFilterFactory == false) {
throw new IllegalArgumentException("Custom normalizer may not use filter [" + tff.name() + "]");
}
tff = tff.getChainAwareTokenFilterFactory(tokenizerFactory, charFilterFactories, tokenFilterFactories, name -> {
try {
return getComponentFactory(indexSettings, new NameOrDefinition(name), "filter",
this::getTokenFilterProvider, this::getTokenFilterProvider);
this::getTokenFilterProvider, prebuiltAnalysis::getTokenFilterFactory, this::getTokenFilterProvider);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {

@Override
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return singletonList(PreConfiguredCharFilter.singleton("append_foo", false, reader -> new AppendCharFilter(reader, "foo")));
return singletonList(PreConfiguredCharFilter.singleton("append", false, reader -> new AppendCharFilter(reader, "foo")));
}
};
registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
Expand Down Expand Up @@ -170,24 +170,11 @@ public void testNoIndexAnalyzers() throws IOException {
List<AnalyzeAction.AnalyzeToken> tokens = analyze.getTokens();
assertEquals(4, tokens.size());

// Refer to a token filter by its type so we get its default configuration
request = new AnalyzeAction.Request();
request.text("the qu1ck brown fox");
request.tokenizer("standard");
request.addTokenFilter("mock");
analyze
= TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount);
tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("qu1ck", tokens.get(0).getTerm());
assertEquals("brown", tokens.get(1).getTerm());
assertEquals("fox", tokens.get(2).getTerm());

// We can refer to a pre-configured token filter by its name to get it
request = new AnalyzeAction.Request();
request.text("the qu1ck brown fox");
request.tokenizer("standard");
request.addCharFilter("append_foo");
request.addCharFilter("append"); // <-- no config, so use preconfigured filter
analyze
= TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount);
tokens = analyze.getTokens();
Expand All @@ -197,35 +184,46 @@ public void testNoIndexAnalyzers() throws IOException {
assertEquals("brown", tokens.get(2).getTerm());
assertEquals("foxfoo", tokens.get(3).getTerm());

// We can refer to a token filter by its type to get its default configuration
// If the preconfigured filter doesn't exist, we use a global filter with no settings
request = new AnalyzeAction.Request();
request.text("the qu1ck brown fox");
request.tokenizer("standard");
request.addCharFilter("append");
request.addTokenFilter("mock"); // <-- not preconfigured, but a global one available
analyze
= TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount);
tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("qu1ck", tokens.get(0).getTerm());
assertEquals("brown", tokens.get(1).getTerm());
assertEquals("fox", tokens.get(2).getTerm());

// We can build a new char filter to get default values
request = new AnalyzeAction.Request();
request.text("the qu1ck brown fox");
request.tokenizer("standard");
request.addTokenFilter(Map.of("type", "mock", "stopword", "brown"));
request.addCharFilter(Map.of("type", "append")); // <-- basic config, uses defaults
analyze
= TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount);
tokens = analyze.getTokens();
assertEquals(4, tokens.size());
assertEquals(3, tokens.size());
assertEquals("the", tokens.get(0).getTerm());
assertEquals("qu1ck", tokens.get(1).getTerm());
assertEquals("brown", tokens.get(2).getTerm());
assertEquals("foxbar", tokens.get(3).getTerm());
assertEquals("foxbar", tokens.get(2).getTerm());

// We can pass a new configuration
request = new AnalyzeAction.Request();
request.text("the qu1ck brown fox");
request.tokenizer("standard");
request.addTokenFilter(Map.of("type", "mock", "stopword", "brown"));
request.addCharFilter("append");
request.text("the qu1ck brown fox");
request.addCharFilter(Map.of("type", "append", "suffix", "baz"));
analyze
= TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount);
tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("the", tokens.get(0).getTerm());
assertEquals("qu1ck", tokens.get(1).getTerm());
assertEquals("foxbar", tokens.get(2).getTerm());
assertEquals("foxbaz", tokens.get(2).getTerm());
}

public void testFillsAttributes() throws IOException {
Expand Down