From cb55c961bbb74b30b32544035f00cdb6d67f938e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Jun 2019 09:23:15 +0100 Subject: [PATCH 1/6] Require [articles] setting in elision filter --- .../common/ElisionTokenFilterFactory.java | 3 +++ .../common/ElisionFilterFactoryTests.java | 24 +++++++++++++++++++ .../test/analysis-common/40_token_filters.yml | 14 +++++++++++ 3 files changed, 41 insertions(+) create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java index 52cb69952b836..b1e01bca92731 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java @@ -35,6 +35,9 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); + if (settings.get("articles") == null) { + throw new IllegalArgumentException("elision filter requires [articles] setting"); + } this.articles = Analysis.parseArticles(env, settings); } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java new file mode 100644 index 0000000000000..ca64665f71c93 --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java @@ -0,0 +1,24 @@ +package org.elasticsearch.analysis.common; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.analysis.AnalysisTestsHelper; +import org.elasticsearch.test.ESTokenStreamTestCase; + +import java.io.IOException; + +public class ElisionFilterFactoryTests extends ESTokenStreamTestCase { + + public void testElisionFilterWithNoArticles() throws IOException { + Settings settings = Settings.builder() + .put("index.analysis.filter.elision.type", "elision") + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())); + + assertEquals("elision filter requires [articles] setting", e.getMessage()); + } + +} diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index 3486b9defd9d2..d679631017c9a 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -587,6 +587,20 @@ - length: { tokens: 1 } - match: { tokens.0.token: avion } + - do: + catch: bad_request + indices.create: + index: test2 + body: + settings: + analysis: + filter: + my_elision: + type: elision + - match: { status: 400 } + - match: { error.type: illegal_argument_exception } + - match: { error.reason: "elision filter requires [articles] setting" } + --- "stemmer": - do: From d9ec212e1c4e6874ab1a21ce2302f1a9ed9d2ac3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Jun 2019 09:37:03 +0100 Subject: [PATCH 2/6] license text --- .../common/ElisionFilterFactoryTests.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java index ca64665f71c93..d91cc2631b25a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.analysis.common; import org.elasticsearch.common.settings.Settings; From 489870e323916b437c829860977452e764e2d9d1 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 24 Jun 2019 12:39:10 +0100 Subject: [PATCH 3/6] Define elision filter as 'requiresAnalysisSettings' --- .../org/elasticsearch/analysis/common/CommonAnalysisPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index f095b766ee1d5..ca53cb8bf3953 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -237,7 +237,7 @@ public Map> getTokenFilters() { filters.put("dutch_stem", DutchStemTokenFilterFactory::new); filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new); filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new); - filters.put("elision", ElisionTokenFilterFactory::new); + filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new)); filters.put("fingerprint", FingerprintTokenFilterFactory::new); filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new); filters.put("french_stem", FrenchStemTokenFilterFactory::new); From a6a6a3ecefe96e7b6bbd392968451990cec1c53f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 26 Jun 2019 16:44:39 +0100 Subject: [PATCH 4/6] Handle articles_path --- .../analysis/tokenfilters/elision-tokenfilter.asciidoc | 5 +++-- .../analysis/common/ElisionTokenFilterFactory.java | 4 ++-- .../analysis/common/ElisionFilterFactoryTests.java | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc index 2ff19cebe893e..34646a0413e36 100644 --- a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc @@ -4,8 +4,9 @@ A token filter which removes elisions. For example, "l'avion" (the plane) will tokenized as "avion" (plane). -Accepts `articles` parameter which is a set of stop words articles. Also accepts -`articles_case`, which indicates whether the filter treats those articles as +Requires either an `articles` parameter which is a set of stop word articles, or +`articles_path` which points to a text file containing the stop set. Also optionally +accepts `articles_case`, which indicates whether the filter treats those articles as case sensitive. For example: diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java index b1e01bca92731..4ab8883794338 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java @@ -35,8 +35,8 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); - if (settings.get("articles") == null) { - throw new IllegalArgumentException("elision filter requires [articles] setting"); + if (settings.get("articles") == null && settings.get("articles_path") == null) { + throw new IllegalArgumentException("elision filter requires [articles] or [articles_path] setting"); } this.articles = Analysis.parseArticles(env, settings); } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java index d91cc2631b25a..dbfd49d5649d5 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ElisionFilterFactoryTests.java @@ -37,7 +37,7 @@ public void testElisionFilterWithNoArticles() throws IOException { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())); - assertEquals("elision filter requires [articles] setting", e.getMessage()); + assertEquals("elision filter requires [articles] or [articles_path] setting", e.getMessage()); } } From 756d94930190bda5d04e50646ca7131cdee40793 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 26 Jun 2019 17:12:40 +0100 Subject: [PATCH 5/6] feedback --- .../analysis/common/ElisionTokenFilterFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java index 4ab8883794338..39d042caa8c25 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ElisionTokenFilterFactory.java @@ -35,10 +35,10 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); - if (settings.get("articles") == null && settings.get("articles_path") == null) { + this.articles = Analysis.parseArticles(env, settings); + if (this.articles == null) { throw new IllegalArgumentException("elision filter requires [articles] or [articles_path] setting"); } - this.articles = Analysis.parseArticles(env, settings); } @Override From a202ab634cf018c9ad2ab8542b5a9accfa583e9c Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 26 Jun 2019 18:22:16 +0100 Subject: [PATCH 6/6] test --- .../rest-api-spec/test/analysis-common/40_token_filters.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index d679631017c9a..92d0dce7b6201 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -599,7 +599,7 @@ type: elision - match: { status: 400 } - match: { error.type: illegal_argument_exception } - - match: { error.reason: "elision filter requires [articles] setting" } + - match: { error.reason: "elision filter requires [articles] or [articles_path] setting" } --- "stemmer":