diff --git a/plugins/analysis-icu/build.gradle b/plugins/analysis-icu/build.gradle index 96a5e8d48370b..90132e2c58fcd 100644 --- a/plugins/analysis-icu/build.gradle +++ b/plugins/analysis-icu/build.gradle @@ -40,9 +40,3 @@ dependencyLicenses { mapping from: /lucene-.*/, to: 'lucene' } -// Muted: https://github.com/elastic/elasticsearch/issues/35173 -integTestRunner { - systemProperty 'tests.rest.blacklist', - 'analysis_icu/10_basic/Normalization with a UnicodeSet Filter,' + - 'analysis_icu/10_basic/Normalization with a CamcelCase UnicodeSet Filter' -} diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java index d3e59bf9488eb..73bf92ee872a5 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java @@ -38,8 +38,10 @@ *

The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.

*/ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent { + private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(IcuNormalizerTokenFilterFactory.class)); + private final Normalizer2 normalizer; public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { diff --git a/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml b/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml index bb2cf97a897d3..5cdfcde72b020 100644 --- a/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml +++ b/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml @@ -46,7 +46,7 @@ - match: { tokens.1.token: bar } - match: { tokens.2.token: resume } --- -"Normalization with a UnicodeSet Filter": +"Normalization with unicode_set_filter": - do: indices.create: index: test @@ -70,31 +70,42 @@ index: test body: char_filter: ["charfilter_icu_normalizer"] - tokenizer: keyword + tokenizer: standard text: charfilter Föo Bâr Ruß - - length: { tokens: 1 } - - match: { tokens.0.token: charfilter föo bâr ruß } + - length: { tokens: 4 } + - match: { tokens.0.token: charfilter } + - match: { tokens.1.token: föo } + - match: { tokens.2.token: bâr } + - match: { tokens.3.token: ruß } + - do: indices.analyze: index: test body: - tokenizer: keyword + tokenizer: standard filter: ["tokenfilter_icu_normalizer"] text: tokenfilter Föo Bâr Ruß - - length: { tokens: 1 } - - match: { tokens.0.token: tokenfilter föo Bâr ruß } + - length: { tokens: 4 } + - match: { tokens.0.token: tokenfilter } + - match: { tokens.1.token: föo } + - match: { tokens.2.token: Bâr } + - match: { tokens.3.token: ruß } + - do: indices.analyze: index: test body: - tokenizer: keyword + tokenizer: standard filter: ["tokenfilter_icu_folding"] text: icufolding Föo Bâr Ruß - - length: { tokens: 1 } - - match: { tokens.0.token: icufolding foo bâr russ } + - length: { tokens: 4 } + - match: { tokens.0.token: icufolding } + - match: { tokens.1.token: foo } + - match: { tokens.2.token: bâr } + - match: { tokens.3.token: russ } --- -"Normalization with a CamcelCase UnicodeSet Filter": +"Normalization with deprecated unicodeSetFilter": - skip: version: " - 6.99.99" reason: unicodeSetFilter deprecated in 7.0.0, replaced by unicode_set_filter @@ -121,6 +132,8 @@ type: icu_folding unicodeSetFilter: "[^â]" - do: + warnings: + - "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]" indices.analyze: index: test body: @@ -132,27 +145,4 @@ - match: { tokens.1.token: föo } - match: { tokens.2.token: bâr } - match: { tokens.3.token: ruß } - - do: - indices.analyze: - index: test - body: - tokenizer: standard - filter: ["tokenfilter_icu_normalizer"] - text: tokenfilter Föo Bâr Ruß - - length: { tokens: 4 } - - match: { tokens.0.token: tokenfilter } - - match: { tokens.1.token: föo } - - match: { tokens.2.token: Bâr } - - match: { tokens.3.token: ruß } - - do: - indices.analyze: - index: test - body: - tokenizer: standard - filter: ["tokenfilter_icu_folding"] - text: icufolding Föo Bâr Ruß - - length: { tokens: 4 } - - match: { tokens.0.token: icufolding } - - match: { tokens.1.token: foo } - - match: { tokens.2.token: bâr } - - match: { tokens.3.token: russ } +