Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable reloading of search time analyzers #40782

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
750f676
WIP
Apr 2, 2019
99a5e4a
Merge branch 'master' into updateable-synonyms
Apr 15, 2019
2a7951b
adressing comments
Apr 15, 2019
07b8156
Access analysis providers via IndexAnalyzers
Apr 15, 2019
6a5afce
Merge branch 'master' into updateable-synonyms
Apr 15, 2019
095f140
Merge branch 'master' into updateable-synonyms
Apr 16, 2019
3de3ec5
Improve reloading token filters
Apr 17, 2019
5daca94
Adding AnalysisRegistry unit tests
Apr 18, 2019
1ba86f0
Merge branch 'master' into updateable-synonyms
Apr 23, 2019
97f1c58
Fix small compile issue
Apr 23, 2019
9aa1f28
Add unit test for AnalysisRegistry#reloadIndexAnalyzers
Apr 23, 2019
abb6523
More tests
Apr 23, 2019
3f02fa5
Merge branch 'master' into updateable-synonyms
Apr 24, 2019
66c7bfa
Adding ReloadAnalyzerAction
Apr 24, 2019
0ea6def
iter
Apr 26, 2019
afe579d
Merge branch 'master' into updateable-synonyms
May 9, 2019
d41b1ef
Merge branch 'master' into updateable-synonyms
May 21, 2019
c9ebe8d
Make CustomAnalyzer reloadable
jimczi May 21, 2019
4aadbdd
Removing unused code after switch to reloading analyzer components
May 22, 2019
aa4d19b
Merge branch 'master' into updateable-synonyms
May 22, 2019
d486308
Removing more stuff from IndexAnalyzers
May 22, 2019
8084dc3
Merge branch 'master' into updateable-synonyms
May 22, 2019
66091a2
Some more cleanups
May 22, 2019
b4fb376
Move getters into AnalyzerComponents
May 22, 2019
e6d7fd8
Merge branch 'master' into updateable-synonyms
May 23, 2019
ac82f93
Fix SynonymAnalyzerIT so it runs in gradle
May 23, 2019
833b3a2
Merge branch 'master' into updateable-synonyms
May 23, 2019
5efac3d
iter
May 23, 2019
0ec5ac9
Merge branch 'master' into updateable-synonyms
May 28, 2019
03fa799
Make ReloadableCustomAnalyzer sibling of CustomAnalyzer rather than e…
May 28, 2019
6d12084
Pull positionIncrement and offsetGap out of AnalyzerComponents
May 28, 2019
9a35f6a
Merge branch 'master' into updateable-synonyms
May 29, 2019
4affb0b
iter
May 29, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
Expand All @@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean lenient;
protected final Settings settings;
protected final Environment environment;
private final boolean updateable;

SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
Expand All @@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
this.expand = settings.getAsBoolean("expand", true);
this.lenient = settings.getAsBoolean("lenient", false);
this.format = settings.get("format", "");
this.updateable = settings.getAsBoolean("updateable", false);
this.environment = env;
}

@Override
public AnalysisMode getAnalysisMode() {
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}

@Override
public TokenStream create(TokenStream tokenStream) {
throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
Expand Down Expand Up @@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
// which doesn't support stacked input tokens
return IDENTITY_FILTER;
}

@Override
public AnalysisMode getAnalysisMode() {
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}
};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzersResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

/**
* This test needs to write to the config directory, this is difficult in an external cluster so we overwrite this to force running with
* {@link InternalTestCluster}
*/
@Override
protected boolean ignoreExternalCluster() {
return true;
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException, InterruptedException {
Path config = internalCluster().getInstance(Environment.class).configFile();
String synonymsFileName = "synonyms.txt";
Path synonymsFile = config.resolve(synonymsFileName);
Files.createFile(synonymsFile);
assertTrue(Files.exists(synonymsFile));
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.CREATE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}
assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", cluster().numDataNodes() * 2)
.put("index.number_of_replicas", 1)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.put("analysis.analyzer.my_synonym_analyzer.filter", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file several times and trigger reloading
for (int i = 0; i < 10; i++) {
String testTerm = randomAlphaOfLength(10);
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, " + testTerm);
}
ReloadAnalyzersResponse reloadResponse = client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet();
assertNoFailures(reloadResponse);
assertEquals(cluster().numDataNodes(), reloadResponse.getSuccessfulShards());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains(testTerm));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", testTerm)).get();
assertHitCount(response, 1L);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerTests extends ESSingleNodeTestCase {

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}

assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.putList("analysis.analyzer.my_synonym_analyzer.filter", "lowercase", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "Foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
assertNoFailures(client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "Foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryAction;
import org.elasticsearch.action.admin.indices.refresh.RefreshAction;
import org.elasticsearch.action.admin.indices.refresh.TransportRefreshAction;
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzerAction;
import org.elasticsearch.action.admin.indices.reloadanalyzer.TransportReloadAnalyzersAction;
import org.elasticsearch.action.admin.indices.rollover.RolloverAction;
import org.elasticsearch.action.admin.indices.rollover.TransportRolloverAction;
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsAction;
Expand Down Expand Up @@ -509,6 +511,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
actions.register(ClearScrollAction.INSTANCE, TransportClearScrollAction.class);
actions.register(RecoveryAction.INSTANCE, TransportRecoveryAction.class);
actions.register(NodesReloadSecureSettingsAction.INSTANCE, TransportNodesReloadSecureSettingsAction.class);
actions.register(ReloadAnalyzerAction.INSTANCE, TransportReloadAnalyzersAction.class);

//Indexed scripts
actions.register(PutStoredScriptAction.INSTANCE, TransportPutStoredScriptAction.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.CustomAnalyzerProvider.AnalyzerComponents;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
import org.elasticsearch.index.analysis.ReloadableCustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -299,18 +301,36 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
}
}

CustomAnalyzer customAnalyzer = null;
if (analyzer instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) analyzer;
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
Analyzer customAnalyzer = null;
// maybe unwrap analyzer from NamedAnalyzer
Analyzer potentialCustomAnalyzer = analyzer;
if (analyzer instanceof NamedAnalyzer) {
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (potentialCustomAnalyzer instanceof CustomAnalyzer || potentialCustomAnalyzer instanceof ReloadableCustomAnalyzer) {
customAnalyzer = potentialCustomAnalyzer;
}

if (customAnalyzer != null) {
// customAnalyzer = divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
// divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories;
TokenizerFactory tokenizerFactory;
TokenFilterFactory[] tokenFilterFactories;
String tokenizerName;
if (customAnalyzer instanceof CustomAnalyzer) {
CustomAnalyzer casted = (CustomAnalyzer) analyzer;
charFilterFactories = casted.charFilters();
tokenizerFactory = casted.tokenizerFactory();
tokenFilterFactories = casted.tokenFilters();
tokenizerName = casted.getTokenizerName();
} else {
// for ReloadableCustomAnalyzer we want to make sure we get the factories from the same components object
AnalyzerComponents components = ((ReloadableCustomAnalyzer) customAnalyzer).getComponents();
charFilterFactories = components.getCharFilters();
tokenizerFactory = components.getTokenizerFactory();
tokenFilterFactories = components.getTokenFilters();
tokenizerName = components.getTokenizerName();
}

String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
Expand Down Expand Up @@ -370,7 +390,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
Expand Down
Loading