Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Christoph Büscher committed Apr 8, 2019
1 parent c95a95b commit 750f676
Show file tree
Hide file tree
Showing 8 changed files with 456 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
Expand All @@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean lenient;
protected final Settings settings;
protected final Environment environment;
private final boolean updateable;

SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
Expand All @@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
this.expand = settings.getAsBoolean("expand", true);
this.lenient = settings.getAsBoolean("lenient", false);
this.format = settings.get("format", "");
this.updateable = settings.getAsBoolean("updateable", false);
this.environment = env;
}

@Override
public AnalysisMode getAnalysisMode() {
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}

@Override
public TokenStream create(TokenStream tokenStream) {
throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
Expand Down Expand Up @@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
// which doesn't support stacked input tokens
return IDENTITY_FILTER;
}

@Override
public AnalysisMode getAnalysisMode() {
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}
};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.BeforeClass;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

@AwaitsFix(bugUrl="Cannot be run outside IDE yet")
public class SynonymAnalyzerIT extends ESIntegTestCase {

private static Path config;
private static Path synonymsFile;
private static final String synonymsFileName = "synonyms.txt";

@BeforeClass
public static void initConfigDir() throws IOException {
config = createTempDir().resolve("config");
if (Files.exists(config) == false) {
Files.createDirectory(config);
}
synonymsFile = config.resolve(synonymsFileName);
Files.createFile(synonymsFile);
assertTrue(Files.exists(synonymsFile));
}


@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

@Override
protected Path nodeConfigPath(int nodeOrdinal) {
return config;
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.CREATE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}
assertTrue(Files.exists(synonymsFile));
assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.put("analysis.analyzer.my_synonym_analyzer.filter", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
// TODO don't use refresh here but something more specific
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerTests extends ESSingleNodeTestCase {

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}

assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.putList("analysis.analyzer.my_synonym_analyzer.filter", "lowercase", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "Foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
// TODO don't use refresh here but something more specific
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "Foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ protected void shardOperationOnPrimary(BasicReplicationRequest shardRequest, Ind
ActionListener<PrimaryResult<BasicReplicationRequest, ReplicationResponse>> listener) {
ActionListener.completeWith(listener, () -> {
primary.refresh("api");
try {
primary.mapperService().reloadSearchAnalyzers(indicesService.getAnalysis());
} catch (Exception ex) {
logger.error(ex.getLocalizedMessage(), ex);
return new PrimaryResult(null, null, ex);
}
logger.trace("{} refresh request executed on primary", primary.shardId());
return new PrimaryResult<>(shardRequest, new ReplicationResponse());
});
Expand All @@ -66,6 +72,12 @@ protected void shardOperationOnPrimary(BasicReplicationRequest shardRequest, Ind
@Override
protected ReplicaResult shardOperationOnReplica(BasicReplicationRequest request, IndexShard replica) {
replica.refresh("api");
try {
replica.mapperService().reloadSearchAnalyzers(indicesService.getAnalysis());
} catch (Exception ex) {
logger.error(ex.getLocalizedMessage(), ex);
return new ReplicaResult(ex);
}
logger.trace("{} refresh request executed on replica", replica.shardId());
return new ReplicaResult();
}
Expand Down
Loading

0 comments on commit 750f676

Please sign in to comment.