From aeb2c3266d43dc1b424f4492e2c00652f72bcdf1 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 8 Jun 2018 08:58:46 +0200 Subject: [PATCH] Move number of language analyzers to analysis-common module (#31143) The following analyzers were moved from server module to analysis-common module: `snowball`, `arabic`, `armenian`, `basque`, `bengali`, `brazilian`, `bulgarian`, `catalan`, `chinese`, `cjk`, `czech`, `danish`, `dutch`, `english`, `finnish`, `french`, `galician` and `german`. Relates to #23658 --- .../common}/ArabicAnalyzerProvider.java | 6 +- .../common}/ArmenianAnalyzerProvider.java | 6 +- .../common}/BasqueAnalyzerProvider.java | 6 +- .../common}/BengaliAnalyzerProvider.java | 6 +- .../common}/BrazilianAnalyzerProvider.java | 6 +- .../common}/BulgarianAnalyzerProvider.java | 6 +- .../common}/CatalanAnalyzerProvider.java | 6 +- .../common}/ChineseAnalyzerProvider.java | 9 +- .../analysis/common}/CjkAnalyzerProvider.java | 6 +- .../analysis/common/CommonAnalysisPlugin.java | 140 ++++- .../common}/CzechAnalyzerProvider.java | 6 +- .../common}/DanishAnalyzerProvider.java | 6 +- .../common}/DutchAnalyzerProvider.java | 6 +- .../common}/EnglishAnalyzerProvider.java | 6 +- .../common}/FinnishAnalyzerProvider.java | 6 +- .../common}/FrenchAnalyzerProvider.java | 6 +- .../common}/GalicianAnalyzerProvider.java | 6 +- .../common}/GermanAnalyzerProvider.java | 6 +- .../analysis/common}/SnowballAnalyzer.java | 6 +- .../common}/SnowballAnalyzerProvider.java | 6 +- .../common}/SnowballAnalyzerTests.java | 10 +- .../test/analysis-common/20_analyzers.yml | 523 ++++++++++++++++++ .../test/search.query/40_query_string.yml | 58 ++ .../TokenCountFieldMapperIntegrationIT.java | 15 +- .../test/update_by_query/30_new_fields.yml | 7 +- .../test/count/20_query_string.yml | 8 - .../test/explain/30_query_string.yml | 10 - .../20_query_string.yml | 8 - .../test/search/60_query_string.yml | 8 - .../index/analysis/AnalysisRegistry.java | 2 +- .../indices/analysis/AnalysisModule.java | 36 -- .../indices/analysis/PreBuiltAnalyzers.java | 179 ------ .../index/analysis/PreBuiltAnalyzerTests.java | 19 +- .../index/mapper/TextFieldMapperTests.java | 40 +- .../query/QueryStringQueryBuilderTests.java | 4 +- .../query/SimpleQueryStringBuilderTests.java | 4 +- .../highlight/HighlighterSearchIT.java | 53 +- .../search/query/SimpleQueryStringIT.java | 61 +- .../validate/SimpleValidateQueryIT.java | 2 +- 39 files changed, 955 insertions(+), 349 deletions(-) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/ArabicAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/ArmenianAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/BasqueAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/BengaliAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/BrazilianAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/BulgarianAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/CatalanAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/ChineseAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/CjkAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/CzechAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/DanishAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/DutchAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/EnglishAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/FinnishAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/FrenchAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/GalicianAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/GermanAnalyzerProvider.java (85%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/SnowballAnalyzer.java (95%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/SnowballAnalyzerProvider.java (92%) rename {server/src/test/java/org/elasticsearch/index/analysis => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/SnowballAnalyzerTests.java (97%) create mode 100644 modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/40_query_string.yml diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java index 10d8f22bde7e8..11e452ddae8db 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ArabicAnalyzer arabicAnalyzer; - public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); arabicAnalyzer = new ArabicAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, ArabicAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java index 6c5193bbb773a..1e99a56979564 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ArmenianAnalyzer analyzer; - public ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new ArmenianAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, ArmenianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java index d55e3fdcba480..b28dec592309c 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BasqueAnalyzer analyzer; - public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BasqueAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, BasqueAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java index ba11cde8fa190..8136ace4224f0 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.bn.BengaliAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BengaliAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BengaliAnalyzer analyzer; - public BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BengaliAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, BengaliAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java index 7ca11542ac632..05f72a6c0793f 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BrazilianAnalyzer analyzer; - public BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BrazilianAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, BrazilianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java index f64987d95e836..0463ddb3b0ef2 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BulgarianAnalyzer analyzer; - public BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BulgarianAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, BulgarianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java index ff0f9e323097d..591a352c7215c 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CatalanAnalyzer analyzer; - public CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new CatalanAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, CatalanAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java index 10e6f0dc42f1e..01b529188c6f0 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java @@ -17,12 +17,13 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; /** * Only for old indexes @@ -31,16 +32,16 @@ public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CJKAnalyzer analyzer; - public CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); CharArraySet stopWords = Analysis.parseStopWords( env, indexSettings.getIndexVersionCreated(), settings, CJKAnalyzer.getDefaultStopSet()); diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 433bef902c1a1..24dce7abcf370 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -24,11 +24,17 @@ import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; import org.apache.lucene.analysis.ar.ArabicStemFilter; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.bn.BengaliAnalyzer; import org.apache.lucene.analysis.bn.BengaliNormalizationFilter; +import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianStemFilter; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cjk.CJKBigramFilter; import org.apache.lucene.analysis.cjk.CJKWidthFilter; import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter; @@ -40,14 +46,22 @@ import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.UpperCaseFilter; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechStemFilter; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.de.GermanNormalizationFilter; import org.apache.lucene.analysis.de.GermanStemFilter; +import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.KStemFilter; import org.apache.lucene.analysis.en.PorterStemFilter; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianNormalizationFilter; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.apache.lucene.analysis.hi.HindiNormalizationFilter; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.apache.lucene.analysis.in.IndicNormalizationFilter; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; @@ -64,6 +78,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenFilter; import org.apache.lucene.analysis.ngram.NGramTokenizer; +import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.path.PathHierarchyTokenizer; import org.apache.lucene.analysis.pattern.PatternTokenizer; import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter; @@ -73,6 +88,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.standard.ClassicFilter; import org.apache.lucene.analysis.standard.ClassicTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; import org.apache.lucene.analysis.th.ThaiTokenizer; import org.apache.lucene.analysis.tr.ApostropheFilter; @@ -113,6 +129,24 @@ public Map>> getAn analyzers.put("fingerprint", FingerprintAnalyzerProvider::new); analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new); analyzers.put("pattern", PatternAnalyzerProvider::new); + analyzers.put("snowball", SnowballAnalyzerProvider::new); + analyzers.put("arabic", ArabicAnalyzerProvider::new); + analyzers.put("armenian", ArmenianAnalyzerProvider::new); + analyzers.put("basque", BasqueAnalyzerProvider::new); + analyzers.put("bengali", BengaliAnalyzerProvider::new); + analyzers.put("brazilian", BrazilianAnalyzerProvider::new); + analyzers.put("bulgarian", BulgarianAnalyzerProvider::new); + analyzers.put("catalan", CatalanAnalyzerProvider::new); + analyzers.put("chinese", ChineseAnalyzerProvider::new); + analyzers.put("cjk", CjkAnalyzerProvider::new); + analyzers.put("czech", CzechAnalyzerProvider::new); + analyzers.put("danish", DanishAnalyzerProvider::new); + analyzers.put("dutch", DutchAnalyzerProvider::new); + analyzers.put("english", EnglishAnalyzerProvider::new); + analyzers.put("finnish", FinnishAnalyzerProvider::new); + analyzers.put("french", FrenchAnalyzerProvider::new); + analyzers.put("galician", GalicianAnalyzerProvider::new); + analyzers.put("german", GermanAnalyzerProvider::new); return analyzers; } @@ -213,10 +247,108 @@ public Map> getTokenizers() { @Override public List getPreBuiltAnalyzerProviderFactories() { List analyzers = new ArrayList<>(); - analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, - version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET))); - analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> - new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET))); + analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, version -> { + Analyzer a = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> { + Analyzer a = new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, + CharArraySet.EMPTY_SET); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, version -> { + Analyzer a = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, version -> { + Analyzer a = new ArabicAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, version -> { + Analyzer a = new ArmenianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, version -> { + Analyzer a = new BasqueAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, version -> { + Analyzer a = new BengaliAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, version -> { + Analyzer a = new BrazilianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, version -> { + Analyzer a = new BulgarianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, version -> { + Analyzer a = new CatalanAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, version -> { + // only for old indices, best effort + Analyzer a = new StandardAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, version -> { + Analyzer a = new CJKAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, version -> { + Analyzer a = new CzechAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, version -> { + Analyzer a = new DanishAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, version -> { + Analyzer a = new DutchAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, version -> { + Analyzer a = new EnglishAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, version -> { + Analyzer a = new FinnishAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, version -> { + Analyzer a = new FrenchAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, version -> { + Analyzer a = new GalicianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, version -> { + Analyzer a = new GermanAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); return analyzers; } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java index 27d20beef4325..9dd75fbf3b882 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CzechAnalyzer analyzer; - public CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new CzechAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, CzechAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java index 897997992b24c..66a789247334d 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.da.DanishAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final DanishAnalyzer analyzer; - public DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new DanishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, DanishAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java index eaa69e939cb1e..e442ff4fa9367 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final DutchAnalyzer analyzer; - public DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new DutchAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, DutchAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java index 952f43296ffeb..ba30d02e20b9e 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final EnglishAnalyzer analyzer; - public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new EnglishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, EnglishAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java index b914fab66fda7..ecd4e19dfad46 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final FinnishAnalyzer analyzer; - public FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new FinnishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, FinnishAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java index 96cdb8ed03e2c..84ec03ebe17f0 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final FrenchAnalyzer analyzer; - public FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new FrenchAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, FrenchAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java index 1dc6de99a4d64..58bb20c7b86b1 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GalicianAnalyzer analyzer; - public GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new GalicianAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, GalicianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java index 52a116acca5ac..65ff5fe7a5e18 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.de.GermanAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GermanAnalyzer analyzer; - public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new GermanAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, GermanAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java similarity index 95% rename from server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java index 1a096b8fa4b9f..5dbe902fe1500 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java @@ -1,4 +1,4 @@ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; /* * Licensed to Elasticsearch under one or more contributor @@ -48,12 +48,12 @@ public final class SnowballAnalyzer extends Analyzer { private CharArraySet stopSet; /** Builds the named analyzer with no stop words. */ - public SnowballAnalyzer(String name) { + SnowballAnalyzer(String name) { this.name = name; } /** Builds the named analyzer with the given stop words. */ - public SnowballAnalyzer(String name, CharArraySet stopWords) { + SnowballAnalyzer(String name, CharArraySet stopWords) { this(name); stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords)); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java index bd3201e3c8a54..e5584ba6b6d45 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.core.StopAnalyzer; @@ -26,6 +26,8 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; import java.util.HashMap; import java.util.Map; @@ -60,7 +62,7 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider>> setupAnalyzers(List analyzers.register("stop", StopAnalyzerProvider::new); analyzers.register("whitespace", WhitespaceAnalyzerProvider::new); analyzers.register("keyword", KeywordAnalyzerProvider::new); - analyzers.register("snowball", SnowballAnalyzerProvider::new); - analyzers.register("arabic", ArabicAnalyzerProvider::new); - analyzers.register("armenian", ArmenianAnalyzerProvider::new); - analyzers.register("basque", BasqueAnalyzerProvider::new); - analyzers.register("bengali", BengaliAnalyzerProvider::new); - analyzers.register("brazilian", BrazilianAnalyzerProvider::new); - analyzers.register("bulgarian", BulgarianAnalyzerProvider::new); - analyzers.register("catalan", CatalanAnalyzerProvider::new); - analyzers.register("chinese", ChineseAnalyzerProvider::new); - analyzers.register("cjk", CjkAnalyzerProvider::new); - analyzers.register("czech", CzechAnalyzerProvider::new); - analyzers.register("danish", DanishAnalyzerProvider::new); - analyzers.register("dutch", DutchAnalyzerProvider::new); - analyzers.register("english", EnglishAnalyzerProvider::new); - analyzers.register("finnish", FinnishAnalyzerProvider::new); - analyzers.register("french", FrenchAnalyzerProvider::new); - analyzers.register("galician", GalicianAnalyzerProvider::new); - analyzers.register("german", GermanAnalyzerProvider::new); analyzers.register("greek", GreekAnalyzerProvider::new); analyzers.register("hindi", HindiAnalyzerProvider::new); analyzers.register("hungarian", HungarianAnalyzerProvider::new); diff --git a/server/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java b/server/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java index 18cc247b84493..0e9aed3c142d9 100644 --- a/server/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java +++ b/server/src/main/java/org/elasticsearch/indices/analysis/PreBuiltAnalyzers.java @@ -20,37 +20,21 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.analysis.ar.ArabicAnalyzer; -import org.apache.lucene.analysis.bg.BulgarianAnalyzer; -import org.apache.lucene.analysis.bn.BengaliAnalyzer; -import org.apache.lucene.analysis.br.BrazilianAnalyzer; -import org.apache.lucene.analysis.ca.CatalanAnalyzer; -import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.ckb.SoraniAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.analysis.cz.CzechAnalyzer; -import org.apache.lucene.analysis.da.DanishAnalyzer; -import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.el.GreekAnalyzer; -import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.es.SpanishAnalyzer; -import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianAnalyzer; -import org.apache.lucene.analysis.fi.FinnishAnalyzer; -import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.ga.IrishAnalyzer; -import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.apache.lucene.analysis.hi.HindiAnalyzer; import org.apache.lucene.analysis.hu.HungarianAnalyzer; -import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.apache.lucene.analysis.id.IndonesianAnalyzer; import org.apache.lucene.analysis.it.ItalianAnalyzer; import org.apache.lucene.analysis.lt.LithuanianAnalyzer; import org.apache.lucene.analysis.lv.LatvianAnalyzer; -import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.no.NorwegianAnalyzer; import org.apache.lucene.analysis.pt.PortugueseAnalyzer; import org.apache.lucene.analysis.ro.RomanianAnalyzer; @@ -61,7 +45,6 @@ import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.elasticsearch.Version; -import org.elasticsearch.index.analysis.SnowballAnalyzer; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import java.util.Locale; @@ -129,168 +112,6 @@ protected Analyzer create(Version version) { } }, - SNOWBALL { - @Override - protected Analyzer create(Version version) { - Analyzer analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET); - analyzer.setVersion(version.luceneVersion); - return analyzer; - } - }, - - ARABIC { - @Override - protected Analyzer create(Version version) { - Analyzer a = new ArabicAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - ARMENIAN { - @Override - protected Analyzer create(Version version) { - Analyzer a = new ArmenianAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - BASQUE { - @Override - protected Analyzer create(Version version) { - Analyzer a = new BasqueAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - BENGALI { - @Override - protected Analyzer create(Version version) { - Analyzer a = new BengaliAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - BRAZILIAN { - @Override - protected Analyzer create(Version version) { - Analyzer a = new BrazilianAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - BULGARIAN { - @Override - protected Analyzer create(Version version) { - Analyzer a = new BulgarianAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - CATALAN { - @Override - protected Analyzer create(Version version) { - Analyzer a = new CatalanAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - CHINESE(CachingStrategy.ONE) { - @Override - protected Analyzer create(Version version) { - Analyzer a = new StandardAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - CJK { - @Override - protected Analyzer create(Version version) { - Analyzer a = new CJKAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - CZECH { - @Override - protected Analyzer create(Version version) { - Analyzer a = new CzechAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - DUTCH { - @Override - protected Analyzer create(Version version) { - Analyzer a = new DutchAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - DANISH { - @Override - protected Analyzer create(Version version) { - Analyzer a = new DanishAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - ENGLISH { - @Override - protected Analyzer create(Version version) { - Analyzer a = new EnglishAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - FINNISH { - @Override - protected Analyzer create(Version version) { - Analyzer a = new FinnishAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - FRENCH { - @Override - protected Analyzer create(Version version) { - Analyzer a = new FrenchAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - GALICIAN { - @Override - protected Analyzer create(Version version) { - Analyzer a = new GalicianAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - - GERMAN { - @Override - protected Analyzer create(Version version) { - Analyzer a = new GermanAnalyzer(); - a.setVersion(version.luceneVersion); - return a; - } - }, - GREEK { @Override protected Analyzer create(Version version) { diff --git a/server/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java b/server/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java index d0ffdbe229dd6..8c4879fd35e82 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/PreBuiltAnalyzerTests.java @@ -61,14 +61,17 @@ public void testThatInstancesAreTheSameAlwaysForKeywordAnalyzer() { } public void testThatInstancesAreCachedAndReused() { - assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT), - PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT)); - // same lucene version should be cached - assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_1), - PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_2)); - - assertNotSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_0), - PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_1)); + assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT), + PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT)); + // same es version should be cached + assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1), + PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1)); + assertNotSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_0), + PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_1)); + + // Same Lucene version should be cached: + assertSame(PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_1), + PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_2)); } public void testThatAnalyzersAreUsedInMapping() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 9a6c264ce3688..51668ec21ad5b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -55,7 +55,6 @@ import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -87,6 +86,9 @@ public void setup() { .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto")) .put("index.analysis.analyzer.synonym.tokenizer", "standard") .put("index.analysis.analyzer.synonym.filter", "mySynonyms") + // Stop filter remains in server as it is part of lucene-core + .put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard") + .put("index.analysis.analyzer.my_stop_analyzer.filter", "stop") .build(); indexService = createIndex("test", settings); parser = indexService.mapperService().documentMapperParser(); @@ -621,7 +623,7 @@ public void testIndexPrefixIndexTypes() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes").endObject() .field("index_options", "offsets") .endObject().endObject().endObject().endObject()); @@ -637,7 +639,7 @@ public void testIndexPrefixIndexTypes() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes").endObject() .field("index_options", "freqs") .endObject().endObject().endObject().endObject()); @@ -654,7 +656,7 @@ public void testIndexPrefixIndexTypes() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes").endObject() .field("index_options", "positions") .endObject().endObject().endObject().endObject()); @@ -675,7 +677,7 @@ public void testIndexPrefixIndexTypes() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes").endObject() .field("term_vector", "with_positions_offsets") .endObject().endObject().endObject().endObject()); @@ -696,7 +698,7 @@ public void testIndexPrefixIndexTypes() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes").endObject() .field("term_vector", "with_positions") .endObject().endObject().endObject().endObject()); @@ -725,7 +727,7 @@ public void testFastPhraseMapping() throws IOException { .startObject("properties") .startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "my_stop_analyzer") .field("index_phrases", true) .endObject() .startObject("synfield") @@ -742,20 +744,20 @@ public void testFastPhraseMapping() throws IOException { queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, true); Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext); - assertThat(q, is(new PhraseQuery("field._index_phrase", "two word"))); + assertThat(q, is(new PhraseQuery("field._index_phrase", "two words"))); Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext); - assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here"))); + assertThat(q2, is(new PhraseQuery("field._index_phrase", "three words", "words here"))); Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); - assertThat(q3, is(new PhraseQuery(1, "field", "two", "word"))); + assertThat(q3, is(new PhraseQuery(1, "field", "two", "words"))); Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext); assertThat(q4, is(new TermQuery(new Term("field", "singleton")))); Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); assertThat(q5, - is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build())); + is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build())); Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext); assertThat(q6, is(new MultiPhraseQuery.Builder() @@ -778,7 +780,7 @@ public void testFastPhraseMapping() throws IOException { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); assertTrue(ts.incrementToken()); - assertEquals("some english", termAtt.toString()); + assertEquals("Some English", termAtt.toString()); } { @@ -821,7 +823,7 @@ public void testIndexPrefixMapping() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 1) .field("max_chars", 10) @@ -855,7 +857,7 @@ public void testIndexPrefixMapping() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes").endObject() .endObject().endObject() .endObject().endObject()); @@ -880,7 +882,7 @@ public void testIndexPrefixMapping() throws IOException { String illegalMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 1) .field("max_chars", 10) @@ -903,7 +905,7 @@ public void testIndexPrefixMapping() throws IOException { String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 11) .field("max_chars", 10) @@ -920,7 +922,7 @@ public void testIndexPrefixMapping() throws IOException { String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 0) .field("max_chars", 10) @@ -937,7 +939,7 @@ public void testIndexPrefixMapping() throws IOException { String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 1) .field("max_chars", 25) @@ -954,7 +956,7 @@ public void testIndexPrefixMapping() throws IOException { String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") - .field("analyzer", "english") + .field("analyzer", "standard") .field("index_prefixes", (String) null) .endObject().endObject() .endObject().endObject()); diff --git a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index cbaf9e0b7e604..ae917a9499c71 100644 --- a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -1306,7 +1306,7 @@ public void testWithStopWords() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = new QueryStringQueryBuilder("the quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD) @@ -1319,7 +1319,7 @@ public void testWithPrefixStopWords() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = new QueryStringQueryBuilder("the* quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD) diff --git a/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java index ceb75f26d7711..b51e2c22a90eb 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java @@ -630,7 +630,7 @@ public void testWithStopWords() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = new SimpleQueryStringBuilder("the quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD) @@ -643,7 +643,7 @@ public void testWithPrefixStopWords() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = new SimpleQueryStringBuilder("the* quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD) diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 717bab12ea5cb..35c5a19cc2e8c 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -22,6 +22,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; @@ -36,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; import org.elasticsearch.index.analysis.AnalyzerProvider; +import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; @@ -66,9 +70,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import static org.elasticsearch.client.Requests.searchRequest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; @@ -113,7 +119,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class); + return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class); } public void testHighlightingWithStoredKeyword() throws IOException { @@ -765,14 +771,19 @@ public void testMatchedFieldsFvhNoRequireFieldMatch() throws Exception { } private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception { + Settings.Builder settings = Settings.builder(); + settings.put(indexSettings()); + settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard"); + settings.put("index.analysis.analyzer.mock_english.filter", "mock_snowball"); assertAcked(prepareCreate("test") + .setSettings(settings) .addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("foo") .field("type", "text") .field("term_vector", "with_positions_offsets") .field("store", true) - .field("analyzer", "english") + .field("analyzer", "mock_english") .startObject("fields") .startObject("plain") .field("type", "text") @@ -785,7 +796,7 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception .field("type", "text") .field("term_vector", "with_positions_offsets") .field("store", true) - .field("analyzer", "english") + .field("analyzer", "mock_english") .startObject("fields") .startObject("plain") .field("type", "text") @@ -2819,7 +2830,7 @@ public void testSynonyms() throws IOException { assertAcked(prepareCreate("test").setSettings(builder.build()) .addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets,search_analyzer=synonym," + - "analyzer=english,index_options=offsets")); + "analyzer=standard,index_options=offsets")); ensureGreen(); client().prepareIndex("test", "type1", "0").setSource( @@ -2983,7 +2994,39 @@ public void testWithNormalizer() throws Exception { } } - public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin { + public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin { + + public final class MockSnowBall extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** Sole constructor. */ + MockSnowBall(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); + if (buffer[length - 1] == 's') { + termAtt.setLength(length - 1); + } + if (length > 3) { + if (buffer[length - 1] == 'g' && buffer[length - 2] == 'n' && buffer[length - 3] == 'i') { + termAtt.setLength(length- 3); + } + } + return true; + } else + return false; + } + } + + @Override + public List getPreConfiguredTokenFilters() { + return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new)); + } @Override public Map>> getAnalyzers() { diff --git a/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 502b10e9a43dd..3ecb34861eb06 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -19,17 +19,32 @@ package org.elasticsearch.search.query; +import org.apache.lucene.analysis.CharacterUtils; +import org.apache.lucene.analysis.MockLowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.MultiTermAwareComponent; +import org.elasticsearch.index.analysis.PreConfiguredCharFilter; +import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.TokenizerFactory; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.SimpleQueryStringFlag; +import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; @@ -38,14 +53,19 @@ import org.elasticsearch.test.InternalSettingsPlugin; import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.function.Function; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery; @@ -68,11 +88,15 @@ public class SimpleQueryStringIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return Arrays.asList(InternalSettingsPlugin.class); // uses index.version.created + return Arrays.asList(MockAnalysisPlugin.class, InternalSettingsPlugin.class); // uses index.version.created } public void testSimpleQueryString() throws ExecutionException, InterruptedException { - createIndex("test"); + Settings.Builder settings = Settings.builder(); + settings.put(indexSettings()); + settings.put("index.analysis.analyzer.mock_snowball.tokenizer", "standard"); + settings.put("index.analysis.analyzer.mock_snowball.filter", "mock_snowball"); + createIndex("test", settings.build()); indexRandom(true, false, client().prepareIndex("test", "type1", "1").setSource("body", "foo"), client().prepareIndex("test", "type1", "2").setSource("body", "bar"), @@ -104,7 +128,7 @@ public void testSimpleQueryString() throws ExecutionException, InterruptedExcept assertSearchHits(searchResponse, "4", "5"); searchResponse = client().prepareSearch().setQuery( - simpleQueryStringQuery("eggplants").analyzer("snowball")).get(); + simpleQueryStringQuery("eggplants").analyzer("mock_snowball")).get(); assertHitCount(searchResponse, 1L); assertFirstHit(searchResponse, hasId("4")); @@ -308,7 +332,7 @@ public void testSimpleQueryStringAnalyzeWildcard() throws ExecutionException, In .startObject("properties") .startObject("location") .field("type", "text") - .field("analyzer", "german") + .field("analyzer", "standard") .endObject() .endObject() .endObject() @@ -569,4 +593,33 @@ private void assertHits(SearchHits hits, String... ids) { } assertThat(hitIds, containsInAnyOrder(ids)); } + + public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin { + + public final class MockSnowBall extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** Sole constructor. */ + MockSnowBall(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + char[] buffer = termAtt.buffer(); + if (buffer[termAtt.length() - 1] == 's') { + termAtt.setLength(termAtt.length() - 1); + } + return true; + } else + return false; + } + } + + @Override + public List getPreConfiguredTokenFilters() { + return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new)); + } + } } diff --git a/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java b/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java index 66fdf81744410..36902b55f5688 100644 --- a/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java +++ b/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java @@ -87,7 +87,7 @@ public void testExplainValidateQueryTwoNodes() throws IOException { .setSource(XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("foo").field("type", "text").endObject() .startObject("bar").field("type", "integer").endObject() - .startObject("baz").field("type", "text").field("analyzer", "snowball").endObject() + .startObject("baz").field("type", "text").field("analyzer", "standard").endObject() .startObject("pin").startObject("properties").startObject("location").field("type", "geo_point").endObject().endObject().endObject() .endObject().endObject().endObject()) .execute().actionGet();