Fix failing ICU tests (#35207)

Fixes #35173
elastic · Nov 6, 2018 · 9f4b93f · 9f4b93f
1 parent 833e0f8
commit 9f4b93f
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 41 deletions.
diff --git a/plugins/analysis-icu/build.gradle b/plugins/analysis-icu/build.gradle
@@ -40,9 +40,3 @@ dependencyLicenses {
   mapping from: /lucene-.*/, to: 'lucene'
 }
 
-// Muted: https://github.com/elastic/elasticsearch/issues/35173
-integTestRunner {
-    systemProperty 'tests.rest.blacklist', 
-        'analysis_icu/10_basic/Normalization with a UnicodeSet Filter,' +
-          'analysis_icu/10_basic/Normalization with a CamcelCase UnicodeSet Filter'
-}
diff --git a/...s-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java b/...s-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java
@@ -38,8 +38,10 @@
  * <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
  */
 public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
+
     private static final DeprecationLogger deprecationLogger =
         new DeprecationLogger(LogManager.getLogger(IcuNormalizerTokenFilterFactory.class));
+
     private final Normalizer2 normalizer;
 
     public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {

diff --git a/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml b/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml
@@ -46,7 +46,7 @@
     - match:  { tokens.1.token: bar }
     - match:  { tokens.2.token: resume }
 ---
-"Normalization with a UnicodeSet Filter":
+"Normalization with unicode_set_filter":
     - do:
         indices.create:
             index:  test
@@ -70,31 +70,42 @@
           index:    test
           body:
             char_filter: ["charfilter_icu_normalizer"]
-            tokenizer:  keyword
+            tokenizer:  standard
             text:     charfilter Föo Bâr Ruß
-    - length: { tokens: 1 }
-    - match:  { tokens.0.token: charfilter föo bâr ruß }
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: charfilter }
+    - match:  { tokens.1.token: föo }
+    - match:  { tokens.2.token: bâr }
+    - match:  { tokens.3.token: ruß }
+
     - do:
         indices.analyze:
           index:    test
           body:
-            tokenizer:  keyword
+            tokenizer:  standard
             filter: ["tokenfilter_icu_normalizer"]
             text:     tokenfilter Föo Bâr Ruß
-    - length: { tokens: 1 }
-    - match:  { tokens.0.token: tokenfilter föo Bâr ruß }
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: tokenfilter }
+    - match:  { tokens.1.token: föo }
+    - match:  { tokens.2.token: Bâr }
+    - match:  { tokens.3.token: ruß }
+
     - do:
         indices.analyze:
           index:    test
           body:
-            tokenizer:  keyword
+            tokenizer:  standard
             filter: ["tokenfilter_icu_folding"]
             text:     icufolding Föo Bâr Ruß
-    - length: { tokens: 1 }
-    - match:  { tokens.0.token: icufolding foo bâr russ }
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: icufolding }
+    - match:  { tokens.1.token: foo }
+    - match:  { tokens.2.token: bâr }
+    - match:  { tokens.3.token: russ }
 
 ---
-"Normalization with a CamcelCase UnicodeSet Filter":
+"Normalization with deprecated unicodeSetFilter":
     - skip:
         version: " - 6.99.99"
         reason:  unicodeSetFilter deprecated in 7.0.0, replaced by unicode_set_filter
@@ -121,6 +132,8 @@
                                     type: icu_folding
                                     unicodeSetFilter: "[^â]"
     - do:
+        warnings:
+        - "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
         indices.analyze:
           index:    test
           body:
@@ -132,27 +145,4 @@
     - match:  { tokens.1.token: föo }
     - match:  { tokens.2.token: bâr }
     - match:  { tokens.3.token: ruß }
-    - do:
-        indices.analyze:
-          index:    test
-          body:
-            tokenizer:  standard
-            filter: ["tokenfilter_icu_normalizer"]
-            text:     tokenfilter Föo Bâr Ruß
-    - length: { tokens: 4 }
-    - match:  { tokens.0.token: tokenfilter }
-    - match:  { tokens.1.token: föo }
-    - match:  { tokens.2.token: Bâr }
-    - match:  { tokens.3.token: ruß }
-    - do:
-        indices.analyze:
-          index:    test
-          body:
-            tokenizer:  standard
-            filter: ["tokenfilter_icu_folding"]
-            text:     icufolding Föo Bâr Ruß
-    - length: { tokens: 4 }
-    - match:  { tokens.0.token: icufolding }
-    - match:  { tokens.1.token: foo }
-    - match:  { tokens.2.token: bâr }
-    - match:  { tokens.3.token: russ }
+